1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscTruth roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscTruth roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) { 320 barray[jj] = value[jj]; 321 } 322 barray += bs; 323 } 324 barray -= bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscTruth roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 413 /* Look up PetscInto the Hash Table */ 414 key = (row/bs)*Nbs+(col/bs)+1; 415 h1 = HASH(size,key,tmp); 416 417 418 idx = h1; 419 #if defined(PETSC_USE_DEBUG) 420 insert_ct++; 421 total_ct++; 422 if (HT[idx] != key) { 423 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == size) { 425 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 426 if (idx == h1) { 427 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 } 431 #else 432 if (HT[idx] != key) { 433 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 434 if (idx == size) { 435 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 436 if (idx == h1) { 437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 438 } 439 } 440 } 441 #endif 442 /* A HASH table entry is found, so insert the values at the correct address */ 443 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 444 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 445 } 446 } else { 447 if (!baij->donotstash) { 448 if (roworiented) { 449 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 450 } else { 451 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 #if defined(PETSC_USE_DEBUG) 457 baij->ht_total_ct = total_ct; 458 baij->ht_insert_ct = insert_ct; 459 #endif 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 PetscTruth roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 PetscInt i,j,ii,jj,row,col; 471 PetscInt rstart=baij->rstartbs; 472 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 473 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 474 PetscReal tmp; 475 MatScalar **HD = baij->hd,*baij_a; 476 const PetscScalar *v_t,*value; 477 #if defined(PETSC_USE_DEBUG) 478 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend){ 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 #if defined(PETSC_USE_COMPLEX) 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 #else 647 sum += (*v)*(*v); v++; 648 #endif 649 } 650 v = bmat->a; 651 nz = bmat->nz*bs2; 652 for (i=0; i<nz; i++) { 653 #if defined(PETSC_USE_COMPLEX) 654 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 660 *nrm = sqrt(*nrm); 661 } else if (type == NORM_1) { /* max column sum */ 662 PetscReal *tmp,*tmp2; 663 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 664 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 665 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 666 v = amat->a; jj = amat->j; 667 for (i=0; i<amat->nz; i++) { 668 for (j=0; j<bs; j++){ 669 col = bs*(cstart + *jj) + j; /* column index */ 670 for (row=0; row<bs; row++){ 671 tmp[col] += PetscAbsScalar(*v); v++; 672 } 673 } 674 jj++; 675 } 676 v = bmat->a; jj = bmat->j; 677 for (i=0; i<bmat->nz; i++) { 678 for (j=0; j<bs; j++){ 679 col = bs*garray[*jj] + j; 680 for (row=0; row<bs; row++){ 681 tmp[col] += PetscAbsScalar(*v); v++; 682 } 683 } 684 jj++; 685 } 686 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 687 *nrm = 0.0; 688 for (j=0; j<mat->cmap->N; j++) { 689 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 690 } 691 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 692 } else if (type == NORM_INFINITY) { /* max row sum */ 693 PetscReal *sums; 694 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 695 sum = 0.0; 696 for (j=0; j<amat->mbs; j++) { 697 for (row=0; row<bs; row++) sums[row] = 0.0; 698 v = amat->a + bs2*amat->i[j]; 699 nz = amat->i[j+1]-amat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 v = bmat->a + bs2*bmat->i[j]; 708 nz = bmat->i[j+1]-bmat->i[j]; 709 for (i=0; i<nz; i++) { 710 for (col=0; col<bs; col++){ 711 for (row=0; row<bs; row++){ 712 sums[row] += PetscAbsScalar(*v); v++; 713 } 714 } 715 } 716 for (row=0; row<bs; row++){ 717 if (sums[row] > sum) sum = sums[row]; 718 } 719 } 720 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 721 ierr = PetscFree(sums);CHKERRQ(ierr); 722 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash || mat->nooffprocentries) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 840 mat->insertmode = addv; /* in case this processor had no cache */ 841 842 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 843 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 854 { 855 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 856 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 857 PetscErrorCode ierr; 858 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 859 PetscInt *row,*col; 860 PetscTruth r1,r2,r3,other_disassembled; 861 MatScalar *val; 862 InsertMode addv = mat->insertmode; 863 PetscMPIInt n; 864 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 PetscFunctionBegin; 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 /* Now assemble all these values with a single function call */ 878 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 883 /* Now process the block-stash. Since the values are stashed column-oriented, 884 set the roworiented flag to column oriented, and after MatSetValues() 885 restore the original flags */ 886 r1 = baij->roworiented; 887 r2 = a->roworiented; 888 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 889 baij->roworiented = PETSC_FALSE; 890 a->roworiented = PETSC_FALSE; 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 899 if (j < n) ncols = j-i; 900 else ncols = n-i; 901 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 906 baij->roworiented = r1; 907 a->roworiented = r2; 908 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 909 } 910 911 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 /* 917 if nonzero structure of submatrix B cannot change then we know that 918 no processor disassembled thus we can skip this stuff 919 */ 920 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 921 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 922 if (mat->was_assembled && !other_disassembled) { 923 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 924 } 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 929 } 930 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 931 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 933 934 #if defined(PETSC_USE_INFO) 935 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 948 baij->rowvalues = 0; 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 955 { 956 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 957 PetscErrorCode ierr; 958 PetscMPIInt size = baij->size,rank = baij->rank; 959 PetscInt bs = mat->rmap->bs; 960 PetscTruth iascii,isdraw; 961 PetscViewer sviewer; 962 PetscViewerFormat format; 963 964 PetscFunctionBegin; 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 966 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 967 if (iascii) { 968 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 969 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 970 MatInfo info; 971 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 972 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscTruth isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1061 } 1062 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1063 ierr = MatDestroy(A);CHKERRQ(ierr); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1070 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1071 { 1072 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1073 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1074 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1075 PetscErrorCode ierr; 1076 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen; 1077 PetscInt *range,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1078 int fd; 1079 PetscScalar *column_values; 1080 FILE *file; 1081 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1082 1083 PetscFunctionBegin; 1084 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1085 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1086 nz = bs2*(A->nz + B->nz); 1087 rlen = mat->rmap->n; 1088 if (!rank) { 1089 header[0] = MAT_FILE_CLASSID; 1090 header[1] = mat->rmap->N; 1091 header[2] = mat->cmap->N; 1092 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1093 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1094 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1095 /* get largest number of rows any processor has */ 1096 range = mat->rmap->range; 1097 for (i=1; i<size; i++) { 1098 rlen = PetscMax(rlen,range[i+1] - range[i]); 1099 } 1100 } else { 1101 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1102 } 1103 1104 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1105 /* compute lengths of each row */ 1106 count = 0; 1107 for (i=0; i<a->mbs; i++) { 1108 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1109 } 1110 /* store the row lengths to the file */ 1111 if (!rank) { 1112 MPI_Status status; 1113 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1114 rlen = (range[1] - range[0])/bs; 1115 for (i=0; i<rlen; i++) { 1116 for (j=0; j<bs; j++) { 1117 row_lens[i*bs+j] = bs*crow_lens[i]; 1118 } 1119 } 1120 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1121 for (i=1; i<size; i++) { 1122 rlen = (range[i+1] - range[i])/bs; 1123 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1124 for (k=0; k<rlen; k++) { 1125 for (j=0; j<bs; j++) { 1126 row_lens[k*bs+j] = bs*crow_lens[k]; 1127 } 1128 } 1129 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1130 } 1131 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1132 } else { 1133 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1134 } 1135 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1136 1137 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1138 information needed to make it for each row from a block row. This does require more communication but still not more than 1139 the communication needed for the nonzero values */ 1140 nzmax = nz; /* space a largest processor needs */ 1141 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1142 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1143 cnt = 0; 1144 for (i=0; i<a->mbs; i++) { 1145 pcnt = cnt; 1146 for (j=B->i[i]; j<B->i[i+1]; j++) { 1147 if ( (col = garray[B->j[j]]) > cstart) break; 1148 for (l=0; l<bs; l++) { 1149 column_indices[cnt++] = bs*col+l; 1150 } 1151 } 1152 for (k=A->i[i]; k<A->i[i+1]; k++) { 1153 for (l=0; l<bs; l++) { 1154 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1155 } 1156 } 1157 for (; j<B->i[i+1]; j++) { 1158 for (l=0; l<bs; l++) { 1159 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1160 } 1161 } 1162 len = cnt - pcnt; 1163 for (k=1; k<bs; k++) { 1164 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1165 cnt += len; 1166 } 1167 } 1168 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1169 1170 /* store the columns to the file */ 1171 if (!rank) { 1172 MPI_Status status; 1173 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 for (i=1; i<size; i++) { 1175 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1176 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1177 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1178 } 1179 } else { 1180 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1181 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1182 } 1183 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1184 1185 /* load up the numerical values */ 1186 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1187 cnt = 0; 1188 for (i=0; i<a->mbs; i++) { 1189 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1190 for (j=B->i[i]; j<B->i[i+1]; j++) { 1191 if ( garray[B->j[j]] > cstart) break; 1192 for (l=0; l<bs; l++) { 1193 for (ll=0; ll<bs; ll++) { 1194 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1195 } 1196 } 1197 cnt += bs; 1198 } 1199 for (k=A->i[i]; k<A->i[i+1]; k++) { 1200 for (l=0; l<bs; l++) { 1201 for (ll=0; ll<bs; ll++) { 1202 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1203 } 1204 } 1205 cnt += bs; 1206 } 1207 for (; j<B->i[i+1]; j++) { 1208 for (l=0; l<bs; l++) { 1209 for (ll=0; ll<bs; ll++) { 1210 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1211 } 1212 } 1213 cnt += bs; 1214 } 1215 cnt += (bs-1)*rlen; 1216 } 1217 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1218 1219 /* store the column values to the file */ 1220 if (!rank) { 1221 MPI_Status status; 1222 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1223 for (i=1; i<size; i++) { 1224 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1225 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 } else { 1229 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1230 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1231 } 1232 ierr = PetscFree(column_values);CHKERRQ(ierr); 1233 1234 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1235 if (file) { 1236 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1237 } 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatView_MPIBAIJ" 1243 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1244 { 1245 PetscErrorCode ierr; 1246 PetscTruth iascii,isdraw,issocket,isbinary; 1247 1248 PetscFunctionBegin; 1249 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1250 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1251 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1252 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1253 if (iascii || isdraw || issocket) { 1254 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1255 } else if (isbinary) { 1256 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1257 } else { 1258 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1266 { 1267 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 #if defined(PETSC_USE_LOG) 1272 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1273 #endif 1274 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1275 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1276 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1277 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1278 #if defined (PETSC_USE_CTABLE) 1279 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1280 #else 1281 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1282 #endif 1283 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1284 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1285 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1286 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1287 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1290 ierr = PetscFree(baij);CHKERRQ(ierr); 1291 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatMult_MPIBAIJ" 1305 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1306 { 1307 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 PetscInt nt; 1310 1311 PetscFunctionBegin; 1312 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1313 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1314 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1315 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1316 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1317 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1318 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1325 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1326 { 1327 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1332 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1333 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1340 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1341 { 1342 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1343 PetscErrorCode ierr; 1344 PetscTruth merged; 1345 1346 PetscFunctionBegin; 1347 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1348 /* do nondiagonal part */ 1349 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1350 if (!merged) { 1351 /* send it on its way */ 1352 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1353 /* do local part */ 1354 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1355 /* receive remote parts: note this assumes the values are not actually */ 1356 /* inserted in yy until the next line */ 1357 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1358 } else { 1359 /* do local part */ 1360 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1361 /* send it on its way */ 1362 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1363 /* values actually were received in the Begin() but we need to call this nop */ 1364 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1371 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1372 { 1373 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1374 PetscErrorCode ierr; 1375 1376 PetscFunctionBegin; 1377 /* do nondiagonal part */ 1378 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1379 /* send it on its way */ 1380 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 /* do local part */ 1382 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1383 /* receive remote parts: note this assumes the values are not actually */ 1384 /* inserted in yy until the next line, which is true for my implementation*/ 1385 /* but is not perhaps always true. */ 1386 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1387 PetscFunctionReturn(0); 1388 } 1389 1390 /* 1391 This only works correctly for square matrices where the subblock A->A is the 1392 diagonal block 1393 */ 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1396 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1397 { 1398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1403 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatScale_MPIBAIJ" 1409 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1410 { 1411 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1412 PetscErrorCode ierr; 1413 1414 PetscFunctionBegin; 1415 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1416 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1422 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1423 { 1424 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1425 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1426 PetscErrorCode ierr; 1427 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1428 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1429 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1430 1431 PetscFunctionBegin; 1432 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1433 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1434 mat->getrowactive = PETSC_TRUE; 1435 1436 if (!mat->rowvalues && (idx || v)) { 1437 /* 1438 allocate enough space to hold information from the longest row. 1439 */ 1440 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1441 PetscInt max = 1,mbs = mat->mbs,tmp; 1442 for (i=0; i<mbs; i++) { 1443 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1444 if (max < tmp) { max = tmp; } 1445 } 1446 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1447 } 1448 lrow = row - brstart; 1449 1450 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1451 if (!v) {pvA = 0; pvB = 0;} 1452 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1453 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1454 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1455 nztot = nzA + nzB; 1456 1457 cmap = mat->garray; 1458 if (v || idx) { 1459 if (nztot) { 1460 /* Sort by increasing column numbers, assuming A and B already sorted */ 1461 PetscInt imark = -1; 1462 if (v) { 1463 *v = v_p = mat->rowvalues; 1464 for (i=0; i<nzB; i++) { 1465 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1466 else break; 1467 } 1468 imark = i; 1469 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1470 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1471 } 1472 if (idx) { 1473 *idx = idx_p = mat->rowindices; 1474 if (imark > -1) { 1475 for (i=0; i<imark; i++) { 1476 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1477 } 1478 } else { 1479 for (i=0; i<nzB; i++) { 1480 if (cmap[cworkB[i]/bs] < cstart) 1481 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1482 else break; 1483 } 1484 imark = i; 1485 } 1486 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1487 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1488 } 1489 } else { 1490 if (idx) *idx = 0; 1491 if (v) *v = 0; 1492 } 1493 } 1494 *nz = nztot; 1495 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1503 { 1504 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1505 1506 PetscFunctionBegin; 1507 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1508 baij->getrowactive = PETSC_FALSE; 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1515 { 1516 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1521 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1528 { 1529 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1530 Mat A = a->A,B = a->B; 1531 PetscErrorCode ierr; 1532 PetscReal isend[5],irecv[5]; 1533 1534 PetscFunctionBegin; 1535 info->block_size = (PetscReal)matin->rmap->bs; 1536 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1537 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1538 isend[3] = info->memory; isend[4] = info->mallocs; 1539 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1540 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1541 isend[3] += info->memory; isend[4] += info->mallocs; 1542 if (flag == MAT_LOCAL) { 1543 info->nz_used = isend[0]; 1544 info->nz_allocated = isend[1]; 1545 info->nz_unneeded = isend[2]; 1546 info->memory = isend[3]; 1547 info->mallocs = isend[4]; 1548 } else if (flag == MAT_GLOBAL_MAX) { 1549 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1550 info->nz_used = irecv[0]; 1551 info->nz_allocated = irecv[1]; 1552 info->nz_unneeded = irecv[2]; 1553 info->memory = irecv[3]; 1554 info->mallocs = irecv[4]; 1555 } else if (flag == MAT_GLOBAL_SUM) { 1556 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1557 info->nz_used = irecv[0]; 1558 info->nz_allocated = irecv[1]; 1559 info->nz_unneeded = irecv[2]; 1560 info->memory = irecv[3]; 1561 info->mallocs = irecv[4]; 1562 } else { 1563 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1564 } 1565 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1566 info->fill_ratio_needed = 0; 1567 info->factor_mallocs = 0; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1574 { 1575 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1576 PetscErrorCode ierr; 1577 1578 PetscFunctionBegin; 1579 switch (op) { 1580 case MAT_NEW_NONZERO_LOCATIONS: 1581 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1582 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1583 case MAT_KEEP_NONZERO_PATTERN: 1584 case MAT_NEW_NONZERO_LOCATION_ERR: 1585 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1586 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1587 break; 1588 case MAT_ROW_ORIENTED: 1589 a->roworiented = flg; 1590 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1591 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1592 break; 1593 case MAT_NEW_DIAGONALS: 1594 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1595 break; 1596 case MAT_IGNORE_OFF_PROC_ENTRIES: 1597 a->donotstash = flg; 1598 break; 1599 case MAT_USE_HASH_TABLE: 1600 a->ht_flag = flg; 1601 break; 1602 case MAT_SYMMETRIC: 1603 case MAT_STRUCTURALLY_SYMMETRIC: 1604 case MAT_HERMITIAN: 1605 case MAT_SYMMETRY_ETERNAL: 1606 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1607 break; 1608 default: 1609 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1616 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1617 { 1618 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1619 Mat_SeqBAIJ *Aloc; 1620 Mat B; 1621 PetscErrorCode ierr; 1622 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1623 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1624 MatScalar *a; 1625 1626 PetscFunctionBegin; 1627 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1628 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1629 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1630 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1631 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1632 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1633 } else { 1634 B = *matout; 1635 } 1636 1637 /* copy over the A part */ 1638 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1639 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1640 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1641 1642 for (i=0; i<mbs; i++) { 1643 rvals[0] = bs*(baij->rstartbs + i); 1644 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1645 for (j=ai[i]; j<ai[i+1]; j++) { 1646 col = (baij->cstartbs+aj[j])*bs; 1647 for (k=0; k<bs; k++) { 1648 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1649 col++; a += bs; 1650 } 1651 } 1652 } 1653 /* copy over the B part */ 1654 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1655 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1656 for (i=0; i<mbs; i++) { 1657 rvals[0] = bs*(baij->rstartbs + i); 1658 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1659 for (j=ai[i]; j<ai[i+1]; j++) { 1660 col = baij->garray[aj[j]]*bs; 1661 for (k=0; k<bs; k++) { 1662 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1663 col++; a += bs; 1664 } 1665 } 1666 } 1667 ierr = PetscFree(rvals);CHKERRQ(ierr); 1668 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 1671 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1672 *matout = B; 1673 } else { 1674 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1675 } 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1681 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1682 { 1683 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1684 Mat a = baij->A,b = baij->B; 1685 PetscErrorCode ierr; 1686 PetscInt s1,s2,s3; 1687 1688 PetscFunctionBegin; 1689 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1690 if (rr) { 1691 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1692 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1693 /* Overlap communication with computation. */ 1694 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 } 1696 if (ll) { 1697 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1698 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1699 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1700 } 1701 /* scale the diagonal block */ 1702 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1703 1704 if (rr) { 1705 /* Do a scatter end and then right scale the off-diagonal block */ 1706 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1707 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1708 } 1709 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1715 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1716 { 1717 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1718 PetscErrorCode ierr; 1719 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1720 PetscInt i,*owners = A->rmap->range; 1721 PetscInt *nprocs,j,idx,nsends,row; 1722 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1723 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1724 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1725 MPI_Comm comm = ((PetscObject)A)->comm; 1726 MPI_Request *send_waits,*recv_waits; 1727 MPI_Status recv_status,*send_status; 1728 #if defined(PETSC_DEBUG) 1729 PetscTruth found = PETSC_FALSE; 1730 #endif 1731 1732 PetscFunctionBegin; 1733 /* first count number of contributors to each processor */ 1734 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1735 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1736 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1737 j = 0; 1738 for (i=0; i<N; i++) { 1739 if (lastidx > (idx = rows[i])) j = 0; 1740 lastidx = idx; 1741 for (; j<size; j++) { 1742 if (idx >= owners[j] && idx < owners[j+1]) { 1743 nprocs[2*j]++; 1744 nprocs[2*j+1] = 1; 1745 owner[i] = j; 1746 #if defined(PETSC_DEBUG) 1747 found = PETSC_TRUE; 1748 #endif 1749 break; 1750 } 1751 } 1752 #if defined(PETSC_DEBUG) 1753 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1754 found = PETSC_FALSE; 1755 #endif 1756 } 1757 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1758 1759 if (A->nooffproczerorows) { 1760 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1761 nrecvs = nsends; 1762 nmax = N; 1763 } else { 1764 /* inform other processors of number of messages and max length*/ 1765 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1766 } 1767 1768 /* post receives: */ 1769 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1770 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1771 for (i=0; i<nrecvs; i++) { 1772 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1773 } 1774 1775 /* do sends: 1776 1) starts[i] gives the starting index in svalues for stuff going to 1777 the ith processor 1778 */ 1779 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1780 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1781 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1782 starts[0] = 0; 1783 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1784 for (i=0; i<N; i++) { 1785 svalues[starts[owner[i]]++] = rows[i]; 1786 } 1787 1788 starts[0] = 0; 1789 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1790 count = 0; 1791 for (i=0; i<size; i++) { 1792 if (nprocs[2*i+1]) { 1793 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1794 } 1795 } 1796 ierr = PetscFree(starts);CHKERRQ(ierr); 1797 1798 base = owners[rank]; 1799 1800 /* wait on receives */ 1801 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1802 count = nrecvs; 1803 slen = 0; 1804 while (count) { 1805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1806 /* unpack receives into our local space */ 1807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1808 source[imdex] = recv_status.MPI_SOURCE; 1809 lens[imdex] = n; 1810 slen += n; 1811 count--; 1812 } 1813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1814 1815 /* move the data into the send scatter */ 1816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1817 count = 0; 1818 for (i=0; i<nrecvs; i++) { 1819 values = rvalues + i*nmax; 1820 for (j=0; j<lens[i]; j++) { 1821 lrows[count++] = values[j] - base; 1822 } 1823 } 1824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1826 ierr = PetscFree(owner);CHKERRQ(ierr); 1827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1828 1829 /* actually zap the local rows */ 1830 /* 1831 Zero the required rows. If the "diagonal block" of the matrix 1832 is square and the user wishes to set the diagonal we use separate 1833 code so that MatSetValues() is not called for each diagonal allocating 1834 new memory, thus calling lots of mallocs and slowing things down. 1835 1836 */ 1837 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1838 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1839 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1840 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1841 } else if (diag != 0.0) { 1842 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1843 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1844 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1845 for (i=0; i<slen; i++) { 1846 row = lrows[i] + rstart_bs; 1847 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1848 } 1849 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1850 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1851 } else { 1852 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1853 } 1854 1855 ierr = PetscFree(lrows);CHKERRQ(ierr); 1856 1857 /* wait on sends */ 1858 if (nsends) { 1859 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1860 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1861 ierr = PetscFree(send_status);CHKERRQ(ierr); 1862 } 1863 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1864 ierr = PetscFree(svalues);CHKERRQ(ierr); 1865 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1871 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1872 { 1873 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1874 PetscErrorCode ierr; 1875 1876 PetscFunctionBegin; 1877 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatEqual_MPIBAIJ" 1885 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1886 { 1887 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1888 Mat a,b,c,d; 1889 PetscTruth flg; 1890 PetscErrorCode ierr; 1891 1892 PetscFunctionBegin; 1893 a = matA->A; b = matA->B; 1894 c = matB->A; d = matB->B; 1895 1896 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1897 if (flg) { 1898 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1899 } 1900 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatCopy_MPIBAIJ" 1906 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1907 { 1908 PetscErrorCode ierr; 1909 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1910 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1911 1912 PetscFunctionBegin; 1913 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1914 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1915 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1916 } else { 1917 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1918 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1919 } 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1925 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1926 { 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1936 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1937 { 1938 PetscErrorCode ierr; 1939 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1940 PetscBLASInt bnz,one=1; 1941 Mat_SeqBAIJ *x,*y; 1942 1943 PetscFunctionBegin; 1944 if (str == SAME_NONZERO_PATTERN) { 1945 PetscScalar alpha = a; 1946 x = (Mat_SeqBAIJ *)xx->A->data; 1947 y = (Mat_SeqBAIJ *)yy->A->data; 1948 bnz = PetscBLASIntCast(x->nz); 1949 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1950 x = (Mat_SeqBAIJ *)xx->B->data; 1951 y = (Mat_SeqBAIJ *)yy->B->data; 1952 bnz = PetscBLASIntCast(x->nz); 1953 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1954 } else { 1955 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1956 } 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1962 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1963 { 1964 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1965 PetscInt rbs,cbs; 1966 PetscErrorCode ierr; 1967 1968 PetscFunctionBegin; 1969 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1970 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1971 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1972 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1973 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1974 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1980 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1981 { 1982 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1983 PetscErrorCode ierr; 1984 1985 PetscFunctionBegin; 1986 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1987 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1993 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1994 { 1995 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1996 PetscErrorCode ierr; 1997 1998 PetscFunctionBegin; 1999 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2000 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2006 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2007 { 2008 PetscErrorCode ierr; 2009 IS iscol_local; 2010 PetscInt csize; 2011 2012 PetscFunctionBegin; 2013 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2014 if (call == MAT_REUSE_MATRIX) { 2015 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2016 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2017 } else { 2018 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2019 } 2020 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2021 if (call == MAT_INITIAL_MATRIX) { 2022 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2023 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2024 } 2025 PetscFunctionReturn(0); 2026 } 2027 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2030 /* 2031 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2032 in local and then by concatenating the local matrices the end result. 2033 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2034 */ 2035 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2036 { 2037 PetscErrorCode ierr; 2038 PetscMPIInt rank,size; 2039 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2040 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2041 Mat *local,M,Mreuse; 2042 MatScalar *vwork,*aa; 2043 MPI_Comm comm = ((PetscObject)mat)->comm; 2044 Mat_SeqBAIJ *aij; 2045 2046 2047 PetscFunctionBegin; 2048 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2049 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2050 2051 if (call == MAT_REUSE_MATRIX) { 2052 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2053 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2054 local = &Mreuse; 2055 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2056 } else { 2057 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2058 Mreuse = *local; 2059 ierr = PetscFree(local);CHKERRQ(ierr); 2060 } 2061 2062 /* 2063 m - number of local rows 2064 n - number of columns (same on all processors) 2065 rstart - first row in new global matrix generated 2066 */ 2067 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2068 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2069 m = m/bs; 2070 n = n/bs; 2071 2072 if (call == MAT_INITIAL_MATRIX) { 2073 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2074 ii = aij->i; 2075 jj = aij->j; 2076 2077 /* 2078 Determine the number of non-zeros in the diagonal and off-diagonal 2079 portions of the matrix in order to do correct preallocation 2080 */ 2081 2082 /* first get start and end of "diagonal" columns */ 2083 if (csize == PETSC_DECIDE) { 2084 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2085 if (mglobal == n*bs) { /* square matrix */ 2086 nlocal = m; 2087 } else { 2088 nlocal = n/size + ((n % size) > rank); 2089 } 2090 } else { 2091 nlocal = csize/bs; 2092 } 2093 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2094 rstart = rend - nlocal; 2095 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2096 2097 /* next, compute all the lengths */ 2098 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2099 olens = dlens + m; 2100 for (i=0; i<m; i++) { 2101 jend = ii[i+1] - ii[i]; 2102 olen = 0; 2103 dlen = 0; 2104 for (j=0; j<jend; j++) { 2105 if (*jj < rstart || *jj >= rend) olen++; 2106 else dlen++; 2107 jj++; 2108 } 2109 olens[i] = olen; 2110 dlens[i] = dlen; 2111 } 2112 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2113 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2114 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2115 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2116 ierr = PetscFree(dlens);CHKERRQ(ierr); 2117 } else { 2118 PetscInt ml,nl; 2119 2120 M = *newmat; 2121 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2122 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2123 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2124 /* 2125 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2126 rather than the slower MatSetValues(). 2127 */ 2128 M->was_assembled = PETSC_TRUE; 2129 M->assembled = PETSC_FALSE; 2130 } 2131 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2132 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2133 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2134 ii = aij->i; 2135 jj = aij->j; 2136 aa = aij->a; 2137 for (i=0; i<m; i++) { 2138 row = rstart/bs + i; 2139 nz = ii[i+1] - ii[i]; 2140 cwork = jj; jj += nz; 2141 vwork = aa; aa += nz; 2142 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2143 } 2144 2145 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2147 *newmat = M; 2148 2149 /* save submatrix used in processor for next request */ 2150 if (call == MAT_INITIAL_MATRIX) { 2151 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2152 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2153 } 2154 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatPermute_MPIBAIJ" 2160 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2161 { 2162 MPI_Comm comm,pcomm; 2163 PetscInt first,local_size,nrows; 2164 const PetscInt *rows; 2165 PetscMPIInt size; 2166 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2171 /* make a collective version of 'rowp' */ 2172 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2173 if (pcomm==comm) { 2174 crowp = rowp; 2175 } else { 2176 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2177 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2178 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2179 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2180 } 2181 /* collect the global row permutation and invert it */ 2182 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2183 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2184 if (pcomm!=comm) { 2185 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2186 } 2187 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2188 /* get the local target indices */ 2189 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2190 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2191 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2192 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2193 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2194 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2195 /* the column permutation is so much easier; 2196 make a local version of 'colp' and invert it */ 2197 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2198 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2199 if (size==1) { 2200 lcolp = colp; 2201 } else { 2202 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2203 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2204 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2205 } 2206 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2207 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2208 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2209 if (size>1) { 2210 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2211 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2212 } 2213 /* now we just get the submatrix */ 2214 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2215 /* clean up */ 2216 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2217 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2223 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2224 { 2225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2226 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2227 2228 PetscFunctionBegin; 2229 if (nghosts) { *nghosts = B->nbs;} 2230 if (ghosts) {*ghosts = baij->garray;} 2231 PetscFunctionReturn(0); 2232 } 2233 2234 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2238 /* 2239 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2240 */ 2241 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2242 { 2243 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2244 PetscErrorCode ierr; 2245 PetscMPIInt size,*ncolsonproc,*disp,nn; 2246 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2247 const PetscInt *is; 2248 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2249 PetscInt *rowhit,M,cstart,cend,colb; 2250 PetscInt *columnsforrow,l; 2251 IS *isa; 2252 PetscTruth done,flg; 2253 ISLocalToGlobalMapping map = mat->bmapping; 2254 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2255 2256 PetscFunctionBegin; 2257 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2258 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2259 2260 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2261 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2262 M = mat->rmap->n/bs; 2263 cstart = mat->cmap->rstart/bs; 2264 cend = mat->cmap->rend/bs; 2265 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2266 c->N = mat->cmap->N/bs; 2267 c->m = mat->rmap->n/bs; 2268 c->rstart = mat->rmap->rstart/bs; 2269 2270 c->ncolors = nis; 2271 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2272 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2273 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2274 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2275 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2276 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2277 2278 /* Allow access to data structures of local part of matrix */ 2279 if (!baij->colmap) { 2280 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2281 } 2282 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2283 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2284 2285 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2286 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2287 2288 for (i=0; i<nis; i++) { 2289 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2290 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2291 c->ncolumns[i] = n; 2292 if (n) { 2293 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2294 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2295 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2296 } else { 2297 c->columns[i] = 0; 2298 } 2299 2300 if (ctype == IS_COLORING_GLOBAL){ 2301 /* Determine the total (parallel) number of columns of this color */ 2302 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2303 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2304 2305 nn = PetscMPIIntCast(n); 2306 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2307 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2308 if (!nctot) { 2309 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2310 } 2311 2312 disp[0] = 0; 2313 for (j=1; j<size; j++) { 2314 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2315 } 2316 2317 /* Get complete list of columns for color on each processor */ 2318 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2319 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2320 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2321 } else if (ctype == IS_COLORING_GHOSTED){ 2322 /* Determine local number of columns of this color on this process, including ghost points */ 2323 nctot = n; 2324 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2325 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2326 } else { 2327 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2328 } 2329 2330 /* 2331 Mark all rows affect by these columns 2332 */ 2333 /* Temporary option to allow for debugging/testing */ 2334 flg = PETSC_FALSE; 2335 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2336 if (!flg) {/*-----------------------------------------------------------------------------*/ 2337 /* crude, fast version */ 2338 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2339 /* loop over columns*/ 2340 for (j=0; j<nctot; j++) { 2341 if (ctype == IS_COLORING_GHOSTED) { 2342 col = ltog[cols[j]]; 2343 } else { 2344 col = cols[j]; 2345 } 2346 if (col >= cstart && col < cend) { 2347 /* column is in diagonal block of matrix */ 2348 rows = A_cj + A_ci[col-cstart]; 2349 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2350 } else { 2351 #if defined (PETSC_USE_CTABLE) 2352 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2353 colb --; 2354 #else 2355 colb = baij->colmap[col] - 1; 2356 #endif 2357 if (colb == -1) { 2358 m = 0; 2359 } else { 2360 colb = colb/bs; 2361 rows = B_cj + B_ci[colb]; 2362 m = B_ci[colb+1] - B_ci[colb]; 2363 } 2364 } 2365 /* loop over columns marking them in rowhit */ 2366 for (k=0; k<m; k++) { 2367 rowhit[*rows++] = col + 1; 2368 } 2369 } 2370 2371 /* count the number of hits */ 2372 nrows = 0; 2373 for (j=0; j<M; j++) { 2374 if (rowhit[j]) nrows++; 2375 } 2376 c->nrows[i] = nrows; 2377 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2378 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2379 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2380 nrows = 0; 2381 for (j=0; j<M; j++) { 2382 if (rowhit[j]) { 2383 c->rows[i][nrows] = j; 2384 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2385 nrows++; 2386 } 2387 } 2388 } else {/*-------------------------------------------------------------------------------*/ 2389 /* slow version, using rowhit as a linked list */ 2390 PetscInt currentcol,fm,mfm; 2391 rowhit[M] = M; 2392 nrows = 0; 2393 /* loop over columns*/ 2394 for (j=0; j<nctot; j++) { 2395 if (ctype == IS_COLORING_GHOSTED) { 2396 col = ltog[cols[j]]; 2397 } else { 2398 col = cols[j]; 2399 } 2400 if (col >= cstart && col < cend) { 2401 /* column is in diagonal block of matrix */ 2402 rows = A_cj + A_ci[col-cstart]; 2403 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2404 } else { 2405 #if defined (PETSC_USE_CTABLE) 2406 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2407 colb --; 2408 #else 2409 colb = baij->colmap[col] - 1; 2410 #endif 2411 if (colb == -1) { 2412 m = 0; 2413 } else { 2414 colb = colb/bs; 2415 rows = B_cj + B_ci[colb]; 2416 m = B_ci[colb+1] - B_ci[colb]; 2417 } 2418 } 2419 2420 /* loop over columns marking them in rowhit */ 2421 fm = M; /* fm points to first entry in linked list */ 2422 for (k=0; k<m; k++) { 2423 currentcol = *rows++; 2424 /* is it already in the list? */ 2425 do { 2426 mfm = fm; 2427 fm = rowhit[fm]; 2428 } while (fm < currentcol); 2429 /* not in list so add it */ 2430 if (fm != currentcol) { 2431 nrows++; 2432 columnsforrow[currentcol] = col; 2433 /* next three lines insert new entry into linked list */ 2434 rowhit[mfm] = currentcol; 2435 rowhit[currentcol] = fm; 2436 fm = currentcol; 2437 /* fm points to present position in list since we know the columns are sorted */ 2438 } else { 2439 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2440 } 2441 } 2442 } 2443 c->nrows[i] = nrows; 2444 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2445 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2446 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2447 /* now store the linked list of rows into c->rows[i] */ 2448 nrows = 0; 2449 fm = rowhit[M]; 2450 do { 2451 c->rows[i][nrows] = fm; 2452 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2453 fm = rowhit[fm]; 2454 } while (fm < M); 2455 } /* ---------------------------------------------------------------------------------------*/ 2456 ierr = PetscFree(cols);CHKERRQ(ierr); 2457 } 2458 2459 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2460 /* 2461 vscale will contain the "diagonal" on processor scalings followed by the off processor 2462 */ 2463 if (ctype == IS_COLORING_GLOBAL) { 2464 PetscInt *garray; 2465 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2466 for (i=0; i<baij->B->cmap->n/bs; i++) { 2467 for (j=0; j<bs; j++) { 2468 garray[i*bs+j] = bs*baij->garray[i]+j; 2469 } 2470 } 2471 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2472 ierr = PetscFree(garray);CHKERRQ(ierr); 2473 CHKMEMQ; 2474 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2475 for (k=0; k<c->ncolors; k++) { 2476 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2477 for (l=0; l<c->nrows[k]; l++) { 2478 col = c->columnsforrow[k][l]; 2479 if (col >= cstart && col < cend) { 2480 /* column is in diagonal block of matrix */ 2481 colb = col - cstart; 2482 } else { 2483 /* column is in "off-processor" part */ 2484 #if defined (PETSC_USE_CTABLE) 2485 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2486 colb --; 2487 #else 2488 colb = baij->colmap[col] - 1; 2489 #endif 2490 colb = colb/bs; 2491 colb += cend - cstart; 2492 } 2493 c->vscaleforrow[k][l] = colb; 2494 } 2495 } 2496 } else if (ctype == IS_COLORING_GHOSTED) { 2497 /* Get gtol mapping */ 2498 PetscInt N = mat->cmap->N, *gtol; 2499 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2500 for (i=0; i<N; i++) gtol[i] = -1; 2501 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2502 2503 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2504 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2505 for (k=0; k<c->ncolors; k++) { 2506 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2507 for (l=0; l<c->nrows[k]; l++) { 2508 col = c->columnsforrow[k][l]; /* global column index */ 2509 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2510 } 2511 } 2512 ierr = PetscFree(gtol);CHKERRQ(ierr); 2513 } 2514 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2515 2516 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2517 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2518 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2519 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2520 CHKMEMQ; 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2526 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2527 { 2528 Mat B; 2529 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2530 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2531 Mat_SeqAIJ *b; 2532 PetscErrorCode ierr; 2533 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2534 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2535 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2536 2537 PetscFunctionBegin; 2538 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2539 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2540 2541 /* ---------------------------------------------------------------- 2542 Tell every processor the number of nonzeros per row 2543 */ 2544 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2545 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2546 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2547 } 2548 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2549 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2550 displs = recvcounts + size; 2551 for (i=0; i<size; i++) { 2552 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2553 displs[i] = A->rmap->range[i]/bs; 2554 } 2555 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2556 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2557 #else 2558 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2559 #endif 2560 /* --------------------------------------------------------------- 2561 Create the sequential matrix of the same type as the local block diagonal 2562 */ 2563 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2564 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2565 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2566 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2567 b = (Mat_SeqAIJ *)B->data; 2568 2569 /*-------------------------------------------------------------------- 2570 Copy my part of matrix column indices over 2571 */ 2572 sendcount = ad->nz + bd->nz; 2573 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2574 a_jsendbuf = ad->j; 2575 b_jsendbuf = bd->j; 2576 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2577 cnt = 0; 2578 for (i=0; i<n; i++) { 2579 2580 /* put in lower diagonal portion */ 2581 m = bd->i[i+1] - bd->i[i]; 2582 while (m > 0) { 2583 /* is it above diagonal (in bd (compressed) numbering) */ 2584 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2585 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2586 m--; 2587 } 2588 2589 /* put in diagonal portion */ 2590 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2591 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2592 } 2593 2594 /* put in upper diagonal portion */ 2595 while (m-- > 0) { 2596 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2597 } 2598 } 2599 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2600 2601 /*-------------------------------------------------------------------- 2602 Gather all column indices to all processors 2603 */ 2604 for (i=0; i<size; i++) { 2605 recvcounts[i] = 0; 2606 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2607 recvcounts[i] += lens[j]; 2608 } 2609 } 2610 displs[0] = 0; 2611 for (i=1; i<size; i++) { 2612 displs[i] = displs[i-1] + recvcounts[i-1]; 2613 } 2614 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2615 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2616 #else 2617 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2618 #endif 2619 /*-------------------------------------------------------------------- 2620 Assemble the matrix into useable form (note numerical values not yet set) 2621 */ 2622 /* set the b->ilen (length of each row) values */ 2623 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2624 /* set the b->i indices */ 2625 b->i[0] = 0; 2626 for (i=1; i<=A->rmap->N/bs; i++) { 2627 b->i[i] = b->i[i-1] + lens[i-1]; 2628 } 2629 ierr = PetscFree(lens);CHKERRQ(ierr); 2630 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2631 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2632 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2633 2634 if (A->symmetric){ 2635 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2636 } else if (A->hermitian) { 2637 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2638 } else if (A->structurally_symmetric) { 2639 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2640 } 2641 *newmat = B; 2642 PetscFunctionReturn(0); 2643 } 2644 2645 #undef __FUNCT__ 2646 #define __FUNCT__ "MatSOR_MPIBAIJ" 2647 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2648 { 2649 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2650 PetscErrorCode ierr; 2651 Vec bb1 = 0; 2652 2653 PetscFunctionBegin; 2654 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2655 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2656 } 2657 2658 if (flag == SOR_APPLY_UPPER) { 2659 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2660 PetscFunctionReturn(0); 2661 } 2662 2663 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2664 if (flag & SOR_ZERO_INITIAL_GUESS) { 2665 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2666 its--; 2667 } 2668 2669 while (its--) { 2670 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2671 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2672 2673 /* update rhs: bb1 = bb - B*x */ 2674 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2675 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2676 2677 /* local sweep */ 2678 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2679 } 2680 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2681 if (flag & SOR_ZERO_INITIAL_GUESS) { 2682 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2683 its--; 2684 } 2685 while (its--) { 2686 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2687 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2688 2689 /* update rhs: bb1 = bb - B*x */ 2690 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2691 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2692 2693 /* local sweep */ 2694 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2695 } 2696 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2697 if (flag & SOR_ZERO_INITIAL_GUESS) { 2698 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2699 its--; 2700 } 2701 while (its--) { 2702 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2704 2705 /* update rhs: bb1 = bb - B*x */ 2706 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2707 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2708 2709 /* local sweep */ 2710 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2711 } 2712 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2713 2714 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2715 PetscFunctionReturn(0); 2716 } 2717 2718 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2719 2720 2721 /* -------------------------------------------------------------------*/ 2722 static struct _MatOps MatOps_Values = { 2723 MatSetValues_MPIBAIJ, 2724 MatGetRow_MPIBAIJ, 2725 MatRestoreRow_MPIBAIJ, 2726 MatMult_MPIBAIJ, 2727 /* 4*/ MatMultAdd_MPIBAIJ, 2728 MatMultTranspose_MPIBAIJ, 2729 MatMultTransposeAdd_MPIBAIJ, 2730 0, 2731 0, 2732 0, 2733 /*10*/ 0, 2734 0, 2735 0, 2736 MatSOR_MPIBAIJ, 2737 MatTranspose_MPIBAIJ, 2738 /*15*/ MatGetInfo_MPIBAIJ, 2739 MatEqual_MPIBAIJ, 2740 MatGetDiagonal_MPIBAIJ, 2741 MatDiagonalScale_MPIBAIJ, 2742 MatNorm_MPIBAIJ, 2743 /*20*/ MatAssemblyBegin_MPIBAIJ, 2744 MatAssemblyEnd_MPIBAIJ, 2745 MatSetOption_MPIBAIJ, 2746 MatZeroEntries_MPIBAIJ, 2747 /*24*/ MatZeroRows_MPIBAIJ, 2748 0, 2749 0, 2750 0, 2751 0, 2752 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2753 0, 2754 0, 2755 0, 2756 0, 2757 /*34*/ MatDuplicate_MPIBAIJ, 2758 0, 2759 0, 2760 0, 2761 0, 2762 /*39*/ MatAXPY_MPIBAIJ, 2763 MatGetSubMatrices_MPIBAIJ, 2764 MatIncreaseOverlap_MPIBAIJ, 2765 MatGetValues_MPIBAIJ, 2766 MatCopy_MPIBAIJ, 2767 /*44*/ 0, 2768 MatScale_MPIBAIJ, 2769 0, 2770 0, 2771 0, 2772 /*49*/ MatSetBlockSize_MPIBAIJ, 2773 0, 2774 0, 2775 0, 2776 0, 2777 /*54*/ MatFDColoringCreate_MPIBAIJ, 2778 0, 2779 MatSetUnfactored_MPIBAIJ, 2780 MatPermute_MPIBAIJ, 2781 MatSetValuesBlocked_MPIBAIJ, 2782 /*59*/ MatGetSubMatrix_MPIBAIJ, 2783 MatDestroy_MPIBAIJ, 2784 MatView_MPIBAIJ, 2785 0, 2786 0, 2787 /*64*/ 0, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2793 0, 2794 0, 2795 0, 2796 0, 2797 /*74*/ 0, 2798 MatFDColoringApply_BAIJ, 2799 0, 2800 0, 2801 0, 2802 /*79*/ 0, 2803 0, 2804 0, 2805 0, 2806 MatLoad_MPIBAIJ, 2807 /*84*/ 0, 2808 0, 2809 0, 2810 0, 2811 0, 2812 /*89*/ 0, 2813 0, 2814 0, 2815 0, 2816 0, 2817 /*94*/ 0, 2818 0, 2819 0, 2820 0, 2821 0, 2822 /*99*/ 0, 2823 0, 2824 0, 2825 0, 2826 0, 2827 /*104*/0, 2828 MatRealPart_MPIBAIJ, 2829 MatImaginaryPart_MPIBAIJ, 2830 0, 2831 0, 2832 /*109*/0, 2833 0, 2834 0, 2835 0, 2836 0, 2837 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2838 0, 2839 MatGetGhosts_MPIBAIJ, 2840 0, 2841 0, 2842 /*119*/0, 2843 0, 2844 0, 2845 0 2846 }; 2847 2848 EXTERN_C_BEGIN 2849 #undef __FUNCT__ 2850 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2851 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2852 { 2853 PetscFunctionBegin; 2854 *a = ((Mat_MPIBAIJ *)A->data)->A; 2855 *iscopy = PETSC_FALSE; 2856 PetscFunctionReturn(0); 2857 } 2858 EXTERN_C_END 2859 2860 EXTERN_C_BEGIN 2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2862 EXTERN_C_END 2863 2864 EXTERN_C_BEGIN 2865 #undef __FUNCT__ 2866 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2867 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2868 { 2869 PetscInt m,rstart,cstart,cend; 2870 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2871 const PetscInt *JJ=0; 2872 PetscScalar *values=0; 2873 PetscErrorCode ierr; 2874 2875 PetscFunctionBegin; 2876 2877 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2878 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2879 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2880 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2881 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2882 m = B->rmap->n/bs; 2883 rstart = B->rmap->rstart/bs; 2884 cstart = B->cmap->rstart/bs; 2885 cend = B->cmap->rend/bs; 2886 2887 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2888 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2889 for (i=0; i<m; i++) { 2890 nz = ii[i+1] - ii[i]; 2891 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2892 nz_max = PetscMax(nz_max,nz); 2893 JJ = jj + ii[i]; 2894 for (j=0; j<nz; j++) { 2895 if (*JJ >= cstart) break; 2896 JJ++; 2897 } 2898 d = 0; 2899 for (; j<nz; j++) { 2900 if (*JJ++ >= cend) break; 2901 d++; 2902 } 2903 d_nnz[i] = d; 2904 o_nnz[i] = nz - d; 2905 } 2906 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2907 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2908 2909 values = (PetscScalar*)V; 2910 if (!values) { 2911 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2912 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2913 } 2914 for (i=0; i<m; i++) { 2915 PetscInt row = i + rstart; 2916 PetscInt ncols = ii[i+1] - ii[i]; 2917 const PetscInt *icols = jj + ii[i]; 2918 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2919 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2920 } 2921 2922 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2923 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2924 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2925 2926 PetscFunctionReturn(0); 2927 } 2928 EXTERN_C_END 2929 2930 #undef __FUNCT__ 2931 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2932 /*@C 2933 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2934 (the default parallel PETSc format). 2935 2936 Collective on MPI_Comm 2937 2938 Input Parameters: 2939 + A - the matrix 2940 . bs - the block size 2941 . i - the indices into j for the start of each local row (starts with zero) 2942 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2943 - v - optional values in the matrix 2944 2945 Level: developer 2946 2947 .keywords: matrix, aij, compressed row, sparse, parallel 2948 2949 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2950 @*/ 2951 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2952 { 2953 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2954 2955 PetscFunctionBegin; 2956 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2957 if (f) { 2958 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2959 } 2960 PetscFunctionReturn(0); 2961 } 2962 2963 EXTERN_C_BEGIN 2964 #undef __FUNCT__ 2965 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2966 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2967 { 2968 Mat_MPIBAIJ *b; 2969 PetscErrorCode ierr; 2970 PetscInt i, newbs = PetscAbs(bs); 2971 2972 PetscFunctionBegin; 2973 if (bs < 0) { 2974 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2975 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2976 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2977 bs = PetscAbs(bs); 2978 } 2979 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2980 bs = newbs; 2981 2982 2983 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2984 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2985 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2986 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2987 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2988 2989 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2990 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2991 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2992 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2993 2994 if (d_nnz) { 2995 for (i=0; i<B->rmap->n/bs; i++) { 2996 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2997 } 2998 } 2999 if (o_nnz) { 3000 for (i=0; i<B->rmap->n/bs; i++) { 3001 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3002 } 3003 } 3004 3005 b = (Mat_MPIBAIJ*)B->data; 3006 b->bs2 = bs*bs; 3007 b->mbs = B->rmap->n/bs; 3008 b->nbs = B->cmap->n/bs; 3009 b->Mbs = B->rmap->N/bs; 3010 b->Nbs = B->cmap->N/bs; 3011 3012 for (i=0; i<=b->size; i++) { 3013 b->rangebs[i] = B->rmap->range[i]/bs; 3014 } 3015 b->rstartbs = B->rmap->rstart/bs; 3016 b->rendbs = B->rmap->rend/bs; 3017 b->cstartbs = B->cmap->rstart/bs; 3018 b->cendbs = B->cmap->rend/bs; 3019 3020 if (!B->preallocated) { 3021 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3022 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3023 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3024 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3025 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3026 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3027 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3028 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3029 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3030 } 3031 3032 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3033 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3034 B->preallocated = PETSC_TRUE; 3035 PetscFunctionReturn(0); 3036 } 3037 EXTERN_C_END 3038 3039 EXTERN_C_BEGIN 3040 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3041 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3042 EXTERN_C_END 3043 3044 3045 EXTERN_C_BEGIN 3046 #undef __FUNCT__ 3047 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3048 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3049 { 3050 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3051 PetscErrorCode ierr; 3052 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3053 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3054 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3055 3056 PetscFunctionBegin; 3057 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3058 ii[0] = 0; 3059 CHKMEMQ; 3060 for (i=0; i<M; i++) { 3061 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3062 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3063 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3064 /* remove one from count of matrix has diagonal */ 3065 for (j=id[i]; j<id[i+1]; j++) { 3066 if (jd[j] == i) {ii[i+1]--;break;} 3067 } 3068 CHKMEMQ; 3069 } 3070 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3071 cnt = 0; 3072 for (i=0; i<M; i++) { 3073 for (j=io[i]; j<io[i+1]; j++) { 3074 if (garray[jo[j]] > rstart) break; 3075 jj[cnt++] = garray[jo[j]]; 3076 CHKMEMQ; 3077 } 3078 for (k=id[i]; k<id[i+1]; k++) { 3079 if (jd[k] != i) { 3080 jj[cnt++] = rstart + jd[k]; 3081 CHKMEMQ; 3082 } 3083 } 3084 for (;j<io[i+1]; j++) { 3085 jj[cnt++] = garray[jo[j]]; 3086 CHKMEMQ; 3087 } 3088 } 3089 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 EXTERN_C_END 3093 3094 EXTERN_C_BEGIN 3095 #if defined(PETSC_HAVE_MUMPS) 3096 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3097 #endif 3098 EXTERN_C_END 3099 3100 /*MC 3101 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3102 3103 Options Database Keys: 3104 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3105 . -mat_block_size <bs> - set the blocksize used to store the matrix 3106 - -mat_use_hash_table <fact> 3107 3108 Level: beginner 3109 3110 .seealso: MatCreateMPIBAIJ 3111 M*/ 3112 3113 EXTERN_C_BEGIN 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "MatCreate_MPIBAIJ" 3116 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 3117 { 3118 Mat_MPIBAIJ *b; 3119 PetscErrorCode ierr; 3120 PetscTruth flg; 3121 3122 PetscFunctionBegin; 3123 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3124 B->data = (void*)b; 3125 3126 3127 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3128 B->mapping = 0; 3129 B->assembled = PETSC_FALSE; 3130 3131 B->insertmode = NOT_SET_VALUES; 3132 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3133 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3134 3135 /* build local table of row and column ownerships */ 3136 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3137 3138 /* build cache for off array entries formed */ 3139 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3140 b->donotstash = PETSC_FALSE; 3141 b->colmap = PETSC_NULL; 3142 b->garray = PETSC_NULL; 3143 b->roworiented = PETSC_TRUE; 3144 3145 /* stuff used in block assembly */ 3146 b->barray = 0; 3147 3148 /* stuff used for matrix vector multiply */ 3149 b->lvec = 0; 3150 b->Mvctx = 0; 3151 3152 /* stuff for MatGetRow() */ 3153 b->rowindices = 0; 3154 b->rowvalues = 0; 3155 b->getrowactive = PETSC_FALSE; 3156 3157 /* hash table stuff */ 3158 b->ht = 0; 3159 b->hd = 0; 3160 b->ht_size = 0; 3161 b->ht_flag = PETSC_FALSE; 3162 b->ht_fact = 0; 3163 b->ht_total_ct = 0; 3164 b->ht_insert_ct = 0; 3165 3166 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3167 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3168 if (flg) { 3169 PetscReal fact = 1.39; 3170 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3171 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3172 if (fact <= 1.0) fact = 1.39; 3173 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3174 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3175 } 3176 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3177 3178 #if defined(PETSC_HAVE_MUMPS) 3179 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3180 #endif 3181 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3182 "MatConvert_MPIBAIJ_MPIAdj", 3183 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3184 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3185 "MatStoreValues_MPIBAIJ", 3186 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3187 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3188 "MatRetrieveValues_MPIBAIJ", 3189 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3190 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3191 "MatGetDiagonalBlock_MPIBAIJ", 3192 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3193 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3194 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3195 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3196 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3197 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3198 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3199 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3200 "MatDiagonalScaleLocal_MPIBAIJ", 3201 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3202 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3203 "MatSetHashTableFactor_MPIBAIJ", 3204 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3205 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3206 PetscFunctionReturn(0); 3207 } 3208 EXTERN_C_END 3209 3210 /*MC 3211 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3212 3213 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3214 and MATMPIBAIJ otherwise. 3215 3216 Options Database Keys: 3217 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3218 3219 Level: beginner 3220 3221 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3222 M*/ 3223 3224 EXTERN_C_BEGIN 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatCreate_BAIJ" 3227 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3228 { 3229 PetscErrorCode ierr; 3230 PetscMPIInt size; 3231 3232 PetscFunctionBegin; 3233 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3234 if (size == 1) { 3235 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3236 } else { 3237 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3238 } 3239 PetscFunctionReturn(0); 3240 } 3241 EXTERN_C_END 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3245 /*@C 3246 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3247 (block compressed row). For good matrix assembly performance 3248 the user should preallocate the matrix storage by setting the parameters 3249 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3250 performance can be increased by more than a factor of 50. 3251 3252 Collective on Mat 3253 3254 Input Parameters: 3255 + A - the matrix 3256 . bs - size of blockk 3257 . d_nz - number of block nonzeros per block row in diagonal portion of local 3258 submatrix (same for all local rows) 3259 . d_nnz - array containing the number of block nonzeros in the various block rows 3260 of the in diagonal portion of the local (possibly different for each block 3261 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3262 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3263 submatrix (same for all local rows). 3264 - o_nnz - array containing the number of nonzeros in the various block rows of the 3265 off-diagonal portion of the local submatrix (possibly different for 3266 each block row) or PETSC_NULL. 3267 3268 If the *_nnz parameter is given then the *_nz parameter is ignored 3269 3270 Options Database Keys: 3271 + -mat_block_size - size of the blocks to use 3272 - -mat_use_hash_table <fact> 3273 3274 Notes: 3275 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3276 than it must be used on all processors that share the object for that argument. 3277 3278 Storage Information: 3279 For a square global matrix we define each processor's diagonal portion 3280 to be its local rows and the corresponding columns (a square submatrix); 3281 each processor's off-diagonal portion encompasses the remainder of the 3282 local matrix (a rectangular submatrix). 3283 3284 The user can specify preallocated storage for the diagonal part of 3285 the local submatrix with either d_nz or d_nnz (not both). Set 3286 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3287 memory allocation. Likewise, specify preallocated storage for the 3288 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3289 3290 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3291 the figure below we depict these three local rows and all columns (0-11). 3292 3293 .vb 3294 0 1 2 3 4 5 6 7 8 9 10 11 3295 ------------------- 3296 row 3 | o o o d d d o o o o o o 3297 row 4 | o o o d d d o o o o o o 3298 row 5 | o o o d d d o o o o o o 3299 ------------------- 3300 .ve 3301 3302 Thus, any entries in the d locations are stored in the d (diagonal) 3303 submatrix, and any entries in the o locations are stored in the 3304 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3305 stored simply in the MATSEQBAIJ format for compressed row storage. 3306 3307 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3308 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3309 In general, for PDE problems in which most nonzeros are near the diagonal, 3310 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3311 or you will get TERRIBLE performance; see the users' manual chapter on 3312 matrices. 3313 3314 You can call MatGetInfo() to get information on how effective the preallocation was; 3315 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3316 You can also run with the option -info and look for messages with the string 3317 malloc in them to see if additional memory allocation was needed. 3318 3319 Level: intermediate 3320 3321 .keywords: matrix, block, aij, compressed row, sparse, parallel 3322 3323 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3324 @*/ 3325 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3326 { 3327 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3328 3329 PetscFunctionBegin; 3330 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3331 if (f) { 3332 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3333 } 3334 PetscFunctionReturn(0); 3335 } 3336 3337 #undef __FUNCT__ 3338 #define __FUNCT__ "MatCreateMPIBAIJ" 3339 /*@C 3340 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3341 (block compressed row). For good matrix assembly performance 3342 the user should preallocate the matrix storage by setting the parameters 3343 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3344 performance can be increased by more than a factor of 50. 3345 3346 Collective on MPI_Comm 3347 3348 Input Parameters: 3349 + comm - MPI communicator 3350 . bs - size of blockk 3351 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3352 This value should be the same as the local size used in creating the 3353 y vector for the matrix-vector product y = Ax. 3354 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3355 This value should be the same as the local size used in creating the 3356 x vector for the matrix-vector product y = Ax. 3357 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3358 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3359 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3360 submatrix (same for all local rows) 3361 . d_nnz - array containing the number of nonzero blocks in the various block rows 3362 of the in diagonal portion of the local (possibly different for each block 3363 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3364 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3365 submatrix (same for all local rows). 3366 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3367 off-diagonal portion of the local submatrix (possibly different for 3368 each block row) or PETSC_NULL. 3369 3370 Output Parameter: 3371 . A - the matrix 3372 3373 Options Database Keys: 3374 + -mat_block_size - size of the blocks to use 3375 - -mat_use_hash_table <fact> 3376 3377 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3378 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3379 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3380 3381 Notes: 3382 If the *_nnz parameter is given then the *_nz parameter is ignored 3383 3384 A nonzero block is any block that as 1 or more nonzeros in it 3385 3386 The user MUST specify either the local or global matrix dimensions 3387 (possibly both). 3388 3389 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3390 than it must be used on all processors that share the object for that argument. 3391 3392 Storage Information: 3393 For a square global matrix we define each processor's diagonal portion 3394 to be its local rows and the corresponding columns (a square submatrix); 3395 each processor's off-diagonal portion encompasses the remainder of the 3396 local matrix (a rectangular submatrix). 3397 3398 The user can specify preallocated storage for the diagonal part of 3399 the local submatrix with either d_nz or d_nnz (not both). Set 3400 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3401 memory allocation. Likewise, specify preallocated storage for the 3402 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3403 3404 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3405 the figure below we depict these three local rows and all columns (0-11). 3406 3407 .vb 3408 0 1 2 3 4 5 6 7 8 9 10 11 3409 ------------------- 3410 row 3 | o o o d d d o o o o o o 3411 row 4 | o o o d d d o o o o o o 3412 row 5 | o o o d d d o o o o o o 3413 ------------------- 3414 .ve 3415 3416 Thus, any entries in the d locations are stored in the d (diagonal) 3417 submatrix, and any entries in the o locations are stored in the 3418 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3419 stored simply in the MATSEQBAIJ format for compressed row storage. 3420 3421 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3422 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3423 In general, for PDE problems in which most nonzeros are near the diagonal, 3424 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3425 or you will get TERRIBLE performance; see the users' manual chapter on 3426 matrices. 3427 3428 Level: intermediate 3429 3430 .keywords: matrix, block, aij, compressed row, sparse, parallel 3431 3432 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3433 @*/ 3434 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3435 { 3436 PetscErrorCode ierr; 3437 PetscMPIInt size; 3438 3439 PetscFunctionBegin; 3440 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3441 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3442 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3443 if (size > 1) { 3444 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3445 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3446 } else { 3447 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3448 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3449 } 3450 PetscFunctionReturn(0); 3451 } 3452 3453 #undef __FUNCT__ 3454 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3455 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3456 { 3457 Mat mat; 3458 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3459 PetscErrorCode ierr; 3460 PetscInt len=0; 3461 3462 PetscFunctionBegin; 3463 *newmat = 0; 3464 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3465 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3466 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3467 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3468 3469 mat->factortype = matin->factortype; 3470 mat->preallocated = PETSC_TRUE; 3471 mat->assembled = PETSC_TRUE; 3472 mat->insertmode = NOT_SET_VALUES; 3473 3474 a = (Mat_MPIBAIJ*)mat->data; 3475 mat->rmap->bs = matin->rmap->bs; 3476 a->bs2 = oldmat->bs2; 3477 a->mbs = oldmat->mbs; 3478 a->nbs = oldmat->nbs; 3479 a->Mbs = oldmat->Mbs; 3480 a->Nbs = oldmat->Nbs; 3481 3482 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3483 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3484 3485 a->size = oldmat->size; 3486 a->rank = oldmat->rank; 3487 a->donotstash = oldmat->donotstash; 3488 a->roworiented = oldmat->roworiented; 3489 a->rowindices = 0; 3490 a->rowvalues = 0; 3491 a->getrowactive = PETSC_FALSE; 3492 a->barray = 0; 3493 a->rstartbs = oldmat->rstartbs; 3494 a->rendbs = oldmat->rendbs; 3495 a->cstartbs = oldmat->cstartbs; 3496 a->cendbs = oldmat->cendbs; 3497 3498 /* hash table stuff */ 3499 a->ht = 0; 3500 a->hd = 0; 3501 a->ht_size = 0; 3502 a->ht_flag = oldmat->ht_flag; 3503 a->ht_fact = oldmat->ht_fact; 3504 a->ht_total_ct = 0; 3505 a->ht_insert_ct = 0; 3506 3507 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3508 if (oldmat->colmap) { 3509 #if defined (PETSC_USE_CTABLE) 3510 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3511 #else 3512 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3513 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3514 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3515 #endif 3516 } else a->colmap = 0; 3517 3518 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3519 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3520 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3521 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3522 } else a->garray = 0; 3523 3524 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3525 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3526 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3527 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3528 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3529 3530 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3531 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3532 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3533 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3534 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3535 *newmat = mat; 3536 3537 PetscFunctionReturn(0); 3538 } 3539 3540 #undef __FUNCT__ 3541 #define __FUNCT__ "MatLoad_MPIBAIJ" 3542 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3543 { 3544 PetscErrorCode ierr; 3545 int fd; 3546 PetscInt i,nz,j,rstart,rend; 3547 PetscScalar *vals,*buf; 3548 MPI_Comm comm = ((PetscObject)viewer)->comm; 3549 MPI_Status status; 3550 PetscMPIInt rank,size,maxnz; 3551 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3552 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3553 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3554 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3555 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3556 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3557 3558 PetscFunctionBegin; 3559 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3560 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3561 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3562 3563 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3564 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3565 if (!rank) { 3566 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3567 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3568 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3569 } 3570 3571 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3572 3573 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3574 M = header[1]; N = header[2]; 3575 3576 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3577 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3578 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3579 3580 /* If global sizes are set, check if they are consistent with that given in the file */ 3581 if (sizesset) { 3582 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3583 } 3584 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3585 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3586 3587 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3588 3589 /* 3590 This code adds extra rows to make sure the number of rows is 3591 divisible by the blocksize 3592 */ 3593 Mbs = M/bs; 3594 extra_rows = bs - M + bs*Mbs; 3595 if (extra_rows == bs) extra_rows = 0; 3596 else Mbs++; 3597 if (extra_rows && !rank) { 3598 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3599 } 3600 3601 /* determine ownership of all rows */ 3602 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3603 mbs = Mbs/size + ((Mbs % size) > rank); 3604 m = mbs*bs; 3605 } else { /* User set */ 3606 m = newmat->rmap->n; 3607 mbs = m/bs; 3608 } 3609 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3610 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3611 3612 /* process 0 needs enough room for process with most rows */ 3613 if (!rank) { 3614 mmax = rowners[1]; 3615 for (i=2; i<size; i++) { 3616 mmax = PetscMax(mmax,rowners[i]); 3617 } 3618 mmax*=bs; 3619 } else mmax = m; 3620 3621 rowners[0] = 0; 3622 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3623 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3624 rstart = rowners[rank]; 3625 rend = rowners[rank+1]; 3626 3627 /* distribute row lengths to all processors */ 3628 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3629 if (!rank) { 3630 mend = m; 3631 if (size == 1) mend = mend - extra_rows; 3632 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3633 for (j=mend; j<m; j++) locrowlens[j] = 1; 3634 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3635 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3636 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3637 for (j=0; j<m; j++) { 3638 procsnz[0] += locrowlens[j]; 3639 } 3640 for (i=1; i<size; i++) { 3641 mend = browners[i+1] - browners[i]; 3642 if (i == size-1) mend = mend - extra_rows; 3643 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3644 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3645 /* calculate the number of nonzeros on each processor */ 3646 for (j=0; j<browners[i+1]-browners[i]; j++) { 3647 procsnz[i] += rowlengths[j]; 3648 } 3649 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3650 } 3651 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3652 } else { 3653 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3654 } 3655 3656 if (!rank) { 3657 /* determine max buffer needed and allocate it */ 3658 maxnz = procsnz[0]; 3659 for (i=1; i<size; i++) { 3660 maxnz = PetscMax(maxnz,procsnz[i]); 3661 } 3662 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3663 3664 /* read in my part of the matrix column indices */ 3665 nz = procsnz[0]; 3666 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3667 mycols = ibuf; 3668 if (size == 1) nz -= extra_rows; 3669 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3670 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3671 3672 /* read in every ones (except the last) and ship off */ 3673 for (i=1; i<size-1; i++) { 3674 nz = procsnz[i]; 3675 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3676 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3677 } 3678 /* read in the stuff for the last proc */ 3679 if (size != 1) { 3680 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3681 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3682 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3683 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3684 } 3685 ierr = PetscFree(cols);CHKERRQ(ierr); 3686 } else { 3687 /* determine buffer space needed for message */ 3688 nz = 0; 3689 for (i=0; i<m; i++) { 3690 nz += locrowlens[i]; 3691 } 3692 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3693 mycols = ibuf; 3694 /* receive message of column indices*/ 3695 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3696 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3697 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3698 } 3699 3700 /* loop over local rows, determining number of off diagonal entries */ 3701 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3702 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3703 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3704 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3705 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3706 rowcount = 0; nzcount = 0; 3707 for (i=0; i<mbs; i++) { 3708 dcount = 0; 3709 odcount = 0; 3710 for (j=0; j<bs; j++) { 3711 kmax = locrowlens[rowcount]; 3712 for (k=0; k<kmax; k++) { 3713 tmp = mycols[nzcount++]/bs; 3714 if (!mask[tmp]) { 3715 mask[tmp] = 1; 3716 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3717 else masked1[dcount++] = tmp; 3718 } 3719 } 3720 rowcount++; 3721 } 3722 3723 dlens[i] = dcount; 3724 odlens[i] = odcount; 3725 3726 /* zero out the mask elements we set */ 3727 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3728 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3729 } 3730 3731 3732 if (!sizesset) { 3733 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3734 } 3735 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3736 3737 if (!rank) { 3738 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3739 /* read in my part of the matrix numerical values */ 3740 nz = procsnz[0]; 3741 vals = buf; 3742 mycols = ibuf; 3743 if (size == 1) nz -= extra_rows; 3744 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3745 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3746 3747 /* insert into matrix */ 3748 jj = rstart*bs; 3749 for (i=0; i<m; i++) { 3750 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3751 mycols += locrowlens[i]; 3752 vals += locrowlens[i]; 3753 jj++; 3754 } 3755 /* read in other processors (except the last one) and ship out */ 3756 for (i=1; i<size-1; i++) { 3757 nz = procsnz[i]; 3758 vals = buf; 3759 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3760 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3761 } 3762 /* the last proc */ 3763 if (size != 1){ 3764 nz = procsnz[i] - extra_rows; 3765 vals = buf; 3766 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3767 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3768 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3769 } 3770 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3771 } else { 3772 /* receive numeric values */ 3773 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3774 3775 /* receive message of values*/ 3776 vals = buf; 3777 mycols = ibuf; 3778 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3779 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3780 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3781 3782 /* insert into matrix */ 3783 jj = rstart*bs; 3784 for (i=0; i<m; i++) { 3785 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3786 mycols += locrowlens[i]; 3787 vals += locrowlens[i]; 3788 jj++; 3789 } 3790 } 3791 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3792 ierr = PetscFree(buf);CHKERRQ(ierr); 3793 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3794 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3795 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3796 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3797 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3798 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3799 3800 PetscFunctionReturn(0); 3801 } 3802 3803 #undef __FUNCT__ 3804 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3805 /*@ 3806 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3807 3808 Input Parameters: 3809 . mat - the matrix 3810 . fact - factor 3811 3812 Not Collective, each process can use a different factor 3813 3814 Level: advanced 3815 3816 Notes: 3817 This can also be set by the command line option: -mat_use_hash_table <fact> 3818 3819 .keywords: matrix, hashtable, factor, HT 3820 3821 .seealso: MatSetOption() 3822 @*/ 3823 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3824 { 3825 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3826 3827 PetscFunctionBegin; 3828 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3829 if (f) { 3830 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3831 } 3832 PetscFunctionReturn(0); 3833 } 3834 3835 EXTERN_C_BEGIN 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3838 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3839 { 3840 Mat_MPIBAIJ *baij; 3841 3842 PetscFunctionBegin; 3843 baij = (Mat_MPIBAIJ*)mat->data; 3844 baij->ht_fact = fact; 3845 PetscFunctionReturn(0); 3846 } 3847 EXTERN_C_END 3848 3849 #undef __FUNCT__ 3850 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3852 { 3853 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3854 PetscFunctionBegin; 3855 *Ad = a->A; 3856 *Ao = a->B; 3857 *colmap = a->garray; 3858 PetscFunctionReturn(0); 3859 } 3860 3861 /* 3862 Special version for direct calls from Fortran (to eliminate two function call overheads 3863 */ 3864 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3865 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3866 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3867 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3868 #endif 3869 3870 #undef __FUNCT__ 3871 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3872 /*@C 3873 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3874 3875 Collective on Mat 3876 3877 Input Parameters: 3878 + mat - the matrix 3879 . min - number of input rows 3880 . im - input rows 3881 . nin - number of input columns 3882 . in - input columns 3883 . v - numerical values input 3884 - addvin - INSERT_VALUES or ADD_VALUES 3885 3886 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3887 3888 Level: advanced 3889 3890 .seealso: MatSetValuesBlocked() 3891 @*/ 3892 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3893 { 3894 /* convert input arguments to C version */ 3895 Mat mat = *matin; 3896 PetscInt m = *min, n = *nin; 3897 InsertMode addv = *addvin; 3898 3899 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3900 const MatScalar *value; 3901 MatScalar *barray=baij->barray; 3902 PetscTruth roworiented = baij->roworiented; 3903 PetscErrorCode ierr; 3904 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3905 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3906 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3907 3908 PetscFunctionBegin; 3909 /* tasks normally handled by MatSetValuesBlocked() */ 3910 if (mat->insertmode == NOT_SET_VALUES) { 3911 mat->insertmode = addv; 3912 } 3913 #if defined(PETSC_USE_DEBUG) 3914 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3915 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3916 #endif 3917 if (mat->assembled) { 3918 mat->was_assembled = PETSC_TRUE; 3919 mat->assembled = PETSC_FALSE; 3920 } 3921 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3922 3923 3924 if(!barray) { 3925 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3926 baij->barray = barray; 3927 } 3928 3929 if (roworiented) { 3930 stepval = (n-1)*bs; 3931 } else { 3932 stepval = (m-1)*bs; 3933 } 3934 for (i=0; i<m; i++) { 3935 if (im[i] < 0) continue; 3936 #if defined(PETSC_USE_DEBUG) 3937 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3938 #endif 3939 if (im[i] >= rstart && im[i] < rend) { 3940 row = im[i] - rstart; 3941 for (j=0; j<n; j++) { 3942 /* If NumCol = 1 then a copy is not required */ 3943 if ((roworiented) && (n == 1)) { 3944 barray = (MatScalar*)v + i*bs2; 3945 } else if((!roworiented) && (m == 1)) { 3946 barray = (MatScalar*)v + j*bs2; 3947 } else { /* Here a copy is required */ 3948 if (roworiented) { 3949 value = v + i*(stepval+bs)*bs + j*bs; 3950 } else { 3951 value = v + j*(stepval+bs)*bs + i*bs; 3952 } 3953 for (ii=0; ii<bs; ii++,value+=stepval) { 3954 for (jj=0; jj<bs; jj++) { 3955 *barray++ = *value++; 3956 } 3957 } 3958 barray -=bs2; 3959 } 3960 3961 if (in[j] >= cstart && in[j] < cend){ 3962 col = in[j] - cstart; 3963 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3964 } 3965 else if (in[j] < 0) continue; 3966 #if defined(PETSC_USE_DEBUG) 3967 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3968 #endif 3969 else { 3970 if (mat->was_assembled) { 3971 if (!baij->colmap) { 3972 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3973 } 3974 3975 #if defined(PETSC_USE_DEBUG) 3976 #if defined (PETSC_USE_CTABLE) 3977 { PetscInt data; 3978 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3979 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3980 } 3981 #else 3982 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3983 #endif 3984 #endif 3985 #if defined (PETSC_USE_CTABLE) 3986 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3987 col = (col - 1)/bs; 3988 #else 3989 col = (baij->colmap[in[j]] - 1)/bs; 3990 #endif 3991 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3992 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3993 col = in[j]; 3994 } 3995 } 3996 else col = in[j]; 3997 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3998 } 3999 } 4000 } else { 4001 if (!baij->donotstash) { 4002 if (roworiented) { 4003 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4004 } else { 4005 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4006 } 4007 } 4008 } 4009 } 4010 4011 /* task normally handled by MatSetValuesBlocked() */ 4012 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4013 PetscFunctionReturn(0); 4014 } 4015 4016 #undef __FUNCT__ 4017 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4018 /*@ 4019 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4020 CSR format the local rows. 4021 4022 Collective on MPI_Comm 4023 4024 Input Parameters: 4025 + comm - MPI communicator 4026 . bs - the block size, only a block size of 1 is supported 4027 . m - number of local rows (Cannot be PETSC_DECIDE) 4028 . n - This value should be the same as the local size used in creating the 4029 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4030 calculated if N is given) For square matrices n is almost always m. 4031 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4032 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4033 . i - row indices 4034 . j - column indices 4035 - a - matrix values 4036 4037 Output Parameter: 4038 . mat - the matrix 4039 4040 Level: intermediate 4041 4042 Notes: 4043 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4044 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4045 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4046 4047 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4048 4049 .keywords: matrix, aij, compressed row, sparse, parallel 4050 4051 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4052 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4053 @*/ 4054 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4055 { 4056 PetscErrorCode ierr; 4057 4058 4059 PetscFunctionBegin; 4060 if (i[0]) { 4061 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4062 } 4063 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4064 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4065 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4066 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4067 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4068 PetscFunctionReturn(0); 4069 } 4070