1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscBool roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscBool roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) { 320 barray[jj] = value[jj]; 321 } 322 barray += bs; 323 } 324 barray -= bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 413 /* Look up PetscInto the Hash Table */ 414 key = (row/bs)*Nbs+(col/bs)+1; 415 h1 = HASH(size,key,tmp); 416 417 418 idx = h1; 419 #if defined(PETSC_USE_DEBUG) 420 insert_ct++; 421 total_ct++; 422 if (HT[idx] != key) { 423 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == size) { 425 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 426 if (idx == h1) { 427 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 } 431 #else 432 if (HT[idx] != key) { 433 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 434 if (idx == size) { 435 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 436 if (idx == h1) { 437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 438 } 439 } 440 } 441 #endif 442 /* A HASH table entry is found, so insert the values at the correct address */ 443 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 444 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 445 } 446 } else { 447 if (!baij->donotstash) { 448 if (roworiented) { 449 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 450 } else { 451 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 #if defined(PETSC_USE_DEBUG) 457 baij->ht_total_ct = total_ct; 458 baij->ht_insert_ct = insert_ct; 459 #endif 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 PetscBool roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 PetscInt i,j,ii,jj,row,col; 471 PetscInt rstart=baij->rstartbs; 472 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 473 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 474 PetscReal tmp; 475 MatScalar **HD = baij->hd,*baij_a; 476 const PetscScalar *v_t,*value; 477 #if defined(PETSC_USE_DEBUG) 478 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend){ 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 #if defined(PETSC_USE_COMPLEX) 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 #else 647 sum += (*v)*(*v); v++; 648 #endif 649 } 650 v = bmat->a; 651 nz = bmat->nz*bs2; 652 for (i=0; i<nz; i++) { 653 #if defined(PETSC_USE_COMPLEX) 654 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 660 *nrm = sqrt(*nrm); 661 } else if (type == NORM_1) { /* max column sum */ 662 PetscReal *tmp,*tmp2; 663 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 664 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 665 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 666 v = amat->a; jj = amat->j; 667 for (i=0; i<amat->nz; i++) { 668 for (j=0; j<bs; j++){ 669 col = bs*(cstart + *jj) + j; /* column index */ 670 for (row=0; row<bs; row++){ 671 tmp[col] += PetscAbsScalar(*v); v++; 672 } 673 } 674 jj++; 675 } 676 v = bmat->a; jj = bmat->j; 677 for (i=0; i<bmat->nz; i++) { 678 for (j=0; j<bs; j++){ 679 col = bs*garray[*jj] + j; 680 for (row=0; row<bs; row++){ 681 tmp[col] += PetscAbsScalar(*v); v++; 682 } 683 } 684 jj++; 685 } 686 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 687 *nrm = 0.0; 688 for (j=0; j<mat->cmap->N; j++) { 689 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 690 } 691 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 692 } else if (type == NORM_INFINITY) { /* max row sum */ 693 PetscReal *sums; 694 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 695 sum = 0.0; 696 for (j=0; j<amat->mbs; j++) { 697 for (row=0; row<bs; row++) sums[row] = 0.0; 698 v = amat->a + bs2*amat->i[j]; 699 nz = amat->i[j+1]-amat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 v = bmat->a + bs2*bmat->i[j]; 708 nz = bmat->i[j+1]-bmat->i[j]; 709 for (i=0; i<nz; i++) { 710 for (col=0; col<bs; col++){ 711 for (row=0; row<bs; row++){ 712 sums[row] += PetscAbsScalar(*v); v++; 713 } 714 } 715 } 716 for (row=0; row<bs; row++){ 717 if (sums[row] > sum) sum = sums[row]; 718 } 719 } 720 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 721 ierr = PetscFree(sums);CHKERRQ(ierr); 722 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash || mat->nooffprocentries) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 840 mat->insertmode = addv; /* in case this processor had no cache */ 841 842 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 843 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 854 { 855 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 856 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 857 PetscErrorCode ierr; 858 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 859 PetscInt *row,*col; 860 PetscBool r1,r2,r3,other_disassembled; 861 MatScalar *val; 862 InsertMode addv = mat->insertmode; 863 PetscMPIInt n; 864 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 PetscFunctionBegin; 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 /* Now assemble all these values with a single function call */ 878 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 883 /* Now process the block-stash. Since the values are stashed column-oriented, 884 set the roworiented flag to column oriented, and after MatSetValues() 885 restore the original flags */ 886 r1 = baij->roworiented; 887 r2 = a->roworiented; 888 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 889 baij->roworiented = PETSC_FALSE; 890 a->roworiented = PETSC_FALSE; 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 899 if (j < n) ncols = j-i; 900 else ncols = n-i; 901 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 906 baij->roworiented = r1; 907 a->roworiented = r2; 908 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 909 } 910 911 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 /* 917 if nonzero structure of submatrix B cannot change then we know that 918 no processor disassembled thus we can skip this stuff 919 */ 920 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 921 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 922 if (mat->was_assembled && !other_disassembled) { 923 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 924 } 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 929 } 930 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 931 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 933 934 #if defined(PETSC_USE_INFO) 935 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 948 baij->rowvalues = 0; 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 955 { 956 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 957 PetscErrorCode ierr; 958 PetscMPIInt size = baij->size,rank = baij->rank; 959 PetscInt bs = mat->rmap->bs; 960 PetscBool iascii,isdraw; 961 PetscViewer sviewer; 962 PetscViewerFormat format; 963 964 PetscFunctionBegin; 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 966 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 967 if (iascii) { 968 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 969 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 970 MatInfo info; 971 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 972 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscBool isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1061 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1062 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1063 } 1064 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1065 ierr = MatDestroy(A);CHKERRQ(ierr); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1073 { 1074 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1075 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1076 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1077 PetscErrorCode ierr; 1078 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen; 1079 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1080 int fd; 1081 PetscScalar *column_values; 1082 FILE *file; 1083 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1084 1085 PetscFunctionBegin; 1086 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1087 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1088 nz = bs2*(A->nz + B->nz); 1089 rlen = mat->rmap->n; 1090 if (!rank) { 1091 header[0] = MAT_FILE_CLASSID; 1092 header[1] = mat->rmap->N; 1093 header[2] = mat->cmap->N; 1094 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1095 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1096 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1097 /* get largest number of rows any processor has */ 1098 range = mat->rmap->range; 1099 for (i=1; i<size; i++) { 1100 rlen = PetscMax(rlen,range[i+1] - range[i]); 1101 } 1102 } else { 1103 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1104 } 1105 1106 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1107 /* compute lengths of each row */ 1108 count = 0; 1109 for (i=0; i<a->mbs; i++) { 1110 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1111 } 1112 /* store the row lengths to the file */ 1113 if (!rank) { 1114 MPI_Status status; 1115 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1116 rlen = (range[1] - range[0])/bs; 1117 for (i=0; i<rlen; i++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[i*bs+j] = bs*crow_lens[i]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 for (i=1; i<size; i++) { 1124 rlen = (range[i+1] - range[i])/bs; 1125 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1126 for (k=0; k<rlen; k++) { 1127 for (j=0; j<bs; j++) { 1128 row_lens[k*bs+j] = bs*crow_lens[k]; 1129 } 1130 } 1131 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1132 } 1133 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1134 } else { 1135 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1136 } 1137 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1138 1139 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1140 information needed to make it for each row from a block row. This does require more communication but still not more than 1141 the communication needed for the nonzero values */ 1142 nzmax = nz; /* space a largest processor needs */ 1143 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1144 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1145 cnt = 0; 1146 for (i=0; i<a->mbs; i++) { 1147 pcnt = cnt; 1148 for (j=B->i[i]; j<B->i[i+1]; j++) { 1149 if ( (col = garray[B->j[j]]) > cstart) break; 1150 for (l=0; l<bs; l++) { 1151 column_indices[cnt++] = bs*col+l; 1152 } 1153 } 1154 for (k=A->i[i]; k<A->i[i+1]; k++) { 1155 for (l=0; l<bs; l++) { 1156 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1157 } 1158 } 1159 for (; j<B->i[i+1]; j++) { 1160 for (l=0; l<bs; l++) { 1161 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1162 } 1163 } 1164 len = cnt - pcnt; 1165 for (k=1; k<bs; k++) { 1166 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1167 cnt += len; 1168 } 1169 } 1170 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1171 1172 /* store the columns to the file */ 1173 if (!rank) { 1174 MPI_Status status; 1175 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1176 for (i=1; i<size; i++) { 1177 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1178 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1179 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 } 1181 } else { 1182 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1183 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1184 } 1185 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1186 1187 /* load up the numerical values */ 1188 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1189 cnt = 0; 1190 for (i=0; i<a->mbs; i++) { 1191 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1192 for (j=B->i[i]; j<B->i[i+1]; j++) { 1193 if ( garray[B->j[j]] > cstart) break; 1194 for (l=0; l<bs; l++) { 1195 for (ll=0; ll<bs; ll++) { 1196 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1197 } 1198 } 1199 cnt += bs; 1200 } 1201 for (k=A->i[i]; k<A->i[i+1]; k++) { 1202 for (l=0; l<bs; l++) { 1203 for (ll=0; ll<bs; ll++) { 1204 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1205 } 1206 } 1207 cnt += bs; 1208 } 1209 for (; j<B->i[i+1]; j++) { 1210 for (l=0; l<bs; l++) { 1211 for (ll=0; ll<bs; ll++) { 1212 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1213 } 1214 } 1215 cnt += bs; 1216 } 1217 cnt += (bs-1)*rlen; 1218 } 1219 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1220 1221 /* store the column values to the file */ 1222 if (!rank) { 1223 MPI_Status status; 1224 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1225 for (i=1; i<size; i++) { 1226 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1227 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1228 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1229 } 1230 } else { 1231 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1232 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_values);CHKERRQ(ierr); 1235 1236 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1237 if (file) { 1238 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatView_MPIBAIJ" 1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1246 { 1247 PetscErrorCode ierr; 1248 PetscBool iascii,isdraw,issocket,isbinary; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1252 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1253 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1254 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1255 if (iascii || isdraw || issocket) { 1256 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1257 } else if (isbinary) { 1258 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1259 } else { 1260 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1268 { 1269 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1270 PetscErrorCode ierr; 1271 1272 PetscFunctionBegin; 1273 #if defined(PETSC_USE_LOG) 1274 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1275 #endif 1276 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1277 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1278 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1279 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1280 #if defined (PETSC_USE_CTABLE) 1281 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1282 #else 1283 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1284 #endif 1285 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1286 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1287 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1288 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1290 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1291 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1292 ierr = PetscFree(baij);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMult_MPIBAIJ" 1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1308 { 1309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt nt; 1312 1313 PetscFunctionBegin; 1314 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1315 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1316 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1317 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1318 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1320 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 PetscBool merged; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1350 /* do nondiagonal part */ 1351 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1352 if (!merged) { 1353 /* send it on its way */ 1354 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1355 /* do local part */ 1356 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357 /* receive remote parts: note this assumes the values are not actually */ 1358 /* inserted in yy until the next line */ 1359 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1360 } else { 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* send it on its way */ 1364 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* values actually were received in the Begin() but we need to call this nop */ 1366 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1374 { 1375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1376 PetscErrorCode ierr; 1377 1378 PetscFunctionBegin; 1379 /* do nondiagonal part */ 1380 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1381 /* send it on its way */ 1382 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 /* do local part */ 1384 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1385 /* receive remote parts: note this assumes the values are not actually */ 1386 /* inserted in yy until the next line, which is true for my implementation*/ 1387 /* but is not perhaps always true. */ 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1405 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatScale_MPIBAIJ" 1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1418 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1425 { 1426 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1427 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1428 PetscErrorCode ierr; 1429 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1430 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1431 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1432 1433 PetscFunctionBegin; 1434 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1435 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1436 mat->getrowactive = PETSC_TRUE; 1437 1438 if (!mat->rowvalues && (idx || v)) { 1439 /* 1440 allocate enough space to hold information from the longest row. 1441 */ 1442 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1443 PetscInt max = 1,mbs = mat->mbs,tmp; 1444 for (i=0; i<mbs; i++) { 1445 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1446 if (max < tmp) { max = tmp; } 1447 } 1448 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1449 } 1450 lrow = row - brstart; 1451 1452 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1453 if (!v) {pvA = 0; pvB = 0;} 1454 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1455 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1456 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1457 nztot = nzA + nzB; 1458 1459 cmap = mat->garray; 1460 if (v || idx) { 1461 if (nztot) { 1462 /* Sort by increasing column numbers, assuming A and B already sorted */ 1463 PetscInt imark = -1; 1464 if (v) { 1465 *v = v_p = mat->rowvalues; 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1468 else break; 1469 } 1470 imark = i; 1471 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1472 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1473 } 1474 if (idx) { 1475 *idx = idx_p = mat->rowindices; 1476 if (imark > -1) { 1477 for (i=0; i<imark; i++) { 1478 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 } 1480 } else { 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) 1483 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1484 else break; 1485 } 1486 imark = i; 1487 } 1488 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1489 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1490 } 1491 } else { 1492 if (idx) *idx = 0; 1493 if (v) *v = 0; 1494 } 1495 } 1496 *nz = nztot; 1497 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 1508 PetscFunctionBegin; 1509 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1510 baij->getrowactive = PETSC_FALSE; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1517 { 1518 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1523 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1530 { 1531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1532 Mat A = a->A,B = a->B; 1533 PetscErrorCode ierr; 1534 PetscReal isend[5],irecv[5]; 1535 1536 PetscFunctionBegin; 1537 info->block_size = (PetscReal)matin->rmap->bs; 1538 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1539 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1540 isend[3] = info->memory; isend[4] = info->mallocs; 1541 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1542 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1543 isend[3] += info->memory; isend[4] += info->mallocs; 1544 if (flag == MAT_LOCAL) { 1545 info->nz_used = isend[0]; 1546 info->nz_allocated = isend[1]; 1547 info->nz_unneeded = isend[2]; 1548 info->memory = isend[3]; 1549 info->mallocs = isend[4]; 1550 } else if (flag == MAT_GLOBAL_MAX) { 1551 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1552 info->nz_used = irecv[0]; 1553 info->nz_allocated = irecv[1]; 1554 info->nz_unneeded = irecv[2]; 1555 info->memory = irecv[3]; 1556 info->mallocs = irecv[4]; 1557 } else if (flag == MAT_GLOBAL_SUM) { 1558 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1559 info->nz_used = irecv[0]; 1560 info->nz_allocated = irecv[1]; 1561 info->nz_unneeded = irecv[2]; 1562 info->memory = irecv[3]; 1563 info->mallocs = irecv[4]; 1564 } else { 1565 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1566 } 1567 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1568 info->fill_ratio_needed = 0; 1569 info->factor_mallocs = 0; 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1575 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1576 { 1577 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 switch (op) { 1582 case MAT_NEW_NONZERO_LOCATIONS: 1583 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1584 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1585 case MAT_KEEP_NONZERO_PATTERN: 1586 case MAT_NEW_NONZERO_LOCATION_ERR: 1587 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1588 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1589 break; 1590 case MAT_ROW_ORIENTED: 1591 a->roworiented = flg; 1592 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1593 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1594 break; 1595 case MAT_NEW_DIAGONALS: 1596 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1597 break; 1598 case MAT_IGNORE_OFF_PROC_ENTRIES: 1599 a->donotstash = flg; 1600 break; 1601 case MAT_USE_HASH_TABLE: 1602 a->ht_flag = flg; 1603 break; 1604 case MAT_SYMMETRIC: 1605 case MAT_STRUCTURALLY_SYMMETRIC: 1606 case MAT_HERMITIAN: 1607 case MAT_SYMMETRY_ETERNAL: 1608 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1609 break; 1610 default: 1611 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1612 } 1613 PetscFunctionReturn(0); 1614 } 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1618 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1619 { 1620 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1621 Mat_SeqBAIJ *Aloc; 1622 Mat B; 1623 PetscErrorCode ierr; 1624 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1625 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1626 MatScalar *a; 1627 1628 PetscFunctionBegin; 1629 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1630 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1631 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1632 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1633 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1634 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1635 } else { 1636 B = *matout; 1637 } 1638 1639 /* copy over the A part */ 1640 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1641 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1642 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1643 1644 for (i=0; i<mbs; i++) { 1645 rvals[0] = bs*(baij->rstartbs + i); 1646 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1647 for (j=ai[i]; j<ai[i+1]; j++) { 1648 col = (baij->cstartbs+aj[j])*bs; 1649 for (k=0; k<bs; k++) { 1650 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1651 col++; a += bs; 1652 } 1653 } 1654 } 1655 /* copy over the B part */ 1656 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1657 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1658 for (i=0; i<mbs; i++) { 1659 rvals[0] = bs*(baij->rstartbs + i); 1660 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1661 for (j=ai[i]; j<ai[i+1]; j++) { 1662 col = baij->garray[aj[j]]*bs; 1663 for (k=0; k<bs; k++) { 1664 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1665 col++; a += bs; 1666 } 1667 } 1668 } 1669 ierr = PetscFree(rvals);CHKERRQ(ierr); 1670 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 1673 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1674 *matout = B; 1675 } else { 1676 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1683 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1684 { 1685 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1686 Mat a = baij->A,b = baij->B; 1687 PetscErrorCode ierr; 1688 PetscInt s1,s2,s3; 1689 1690 PetscFunctionBegin; 1691 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1692 if (rr) { 1693 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1694 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1695 /* Overlap communication with computation. */ 1696 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1697 } 1698 if (ll) { 1699 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1700 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1701 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1702 } 1703 /* scale the diagonal block */ 1704 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1705 1706 if (rr) { 1707 /* Do a scatter end and then right scale the off-diagonal block */ 1708 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1709 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1710 } 1711 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1717 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1718 { 1719 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1720 PetscErrorCode ierr; 1721 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1722 PetscInt i,*owners = A->rmap->range; 1723 PetscInt *nprocs,j,idx,nsends,row; 1724 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1725 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1726 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1727 MPI_Comm comm = ((PetscObject)A)->comm; 1728 MPI_Request *send_waits,*recv_waits; 1729 MPI_Status recv_status,*send_status; 1730 #if defined(PETSC_DEBUG) 1731 PetscBool found = PETSC_FALSE; 1732 #endif 1733 1734 PetscFunctionBegin; 1735 /* first count number of contributors to each processor */ 1736 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1737 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1738 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1739 j = 0; 1740 for (i=0; i<N; i++) { 1741 if (lastidx > (idx = rows[i])) j = 0; 1742 lastidx = idx; 1743 for (; j<size; j++) { 1744 if (idx >= owners[j] && idx < owners[j+1]) { 1745 nprocs[2*j]++; 1746 nprocs[2*j+1] = 1; 1747 owner[i] = j; 1748 #if defined(PETSC_DEBUG) 1749 found = PETSC_TRUE; 1750 #endif 1751 break; 1752 } 1753 } 1754 #if defined(PETSC_DEBUG) 1755 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1756 found = PETSC_FALSE; 1757 #endif 1758 } 1759 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1760 1761 if (A->nooffproczerorows) { 1762 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1763 nrecvs = nsends; 1764 nmax = N; 1765 } else { 1766 /* inform other processors of number of messages and max length*/ 1767 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1768 } 1769 1770 /* post receives: */ 1771 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1772 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1773 for (i=0; i<nrecvs; i++) { 1774 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1775 } 1776 1777 /* do sends: 1778 1) starts[i] gives the starting index in svalues for stuff going to 1779 the ith processor 1780 */ 1781 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1782 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1783 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1784 starts[0] = 0; 1785 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1786 for (i=0; i<N; i++) { 1787 svalues[starts[owner[i]]++] = rows[i]; 1788 } 1789 1790 starts[0] = 0; 1791 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1792 count = 0; 1793 for (i=0; i<size; i++) { 1794 if (nprocs[2*i+1]) { 1795 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1796 } 1797 } 1798 ierr = PetscFree(starts);CHKERRQ(ierr); 1799 1800 base = owners[rank]; 1801 1802 /* wait on receives */ 1803 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1804 count = nrecvs; 1805 slen = 0; 1806 while (count) { 1807 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1808 /* unpack receives into our local space */ 1809 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1810 source[imdex] = recv_status.MPI_SOURCE; 1811 lens[imdex] = n; 1812 slen += n; 1813 count--; 1814 } 1815 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1816 1817 /* move the data into the send scatter */ 1818 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1819 count = 0; 1820 for (i=0; i<nrecvs; i++) { 1821 values = rvalues + i*nmax; 1822 for (j=0; j<lens[i]; j++) { 1823 lrows[count++] = values[j] - base; 1824 } 1825 } 1826 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1827 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1828 ierr = PetscFree(owner);CHKERRQ(ierr); 1829 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1830 1831 /* actually zap the local rows */ 1832 /* 1833 Zero the required rows. If the "diagonal block" of the matrix 1834 is square and the user wishes to set the diagonal we use separate 1835 code so that MatSetValues() is not called for each diagonal allocating 1836 new memory, thus calling lots of mallocs and slowing things down. 1837 1838 */ 1839 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1840 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1841 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1842 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1843 } else if (diag != 0.0) { 1844 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1845 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1846 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1847 for (i=0; i<slen; i++) { 1848 row = lrows[i] + rstart_bs; 1849 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1850 } 1851 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1852 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1853 } else { 1854 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1855 } 1856 1857 ierr = PetscFree(lrows);CHKERRQ(ierr); 1858 1859 /* wait on sends */ 1860 if (nsends) { 1861 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1862 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1863 ierr = PetscFree(send_status);CHKERRQ(ierr); 1864 } 1865 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1866 ierr = PetscFree(svalues);CHKERRQ(ierr); 1867 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1873 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1874 { 1875 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1876 PetscErrorCode ierr; 1877 1878 PetscFunctionBegin; 1879 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1880 PetscFunctionReturn(0); 1881 } 1882 1883 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatEqual_MPIBAIJ" 1887 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1888 { 1889 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1890 Mat a,b,c,d; 1891 PetscBool flg; 1892 PetscErrorCode ierr; 1893 1894 PetscFunctionBegin; 1895 a = matA->A; b = matA->B; 1896 c = matB->A; d = matB->B; 1897 1898 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1899 if (flg) { 1900 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1901 } 1902 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatCopy_MPIBAIJ" 1908 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1909 { 1910 PetscErrorCode ierr; 1911 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1912 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1913 1914 PetscFunctionBegin; 1915 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1916 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1917 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1918 } else { 1919 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1920 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1921 } 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1927 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1928 { 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1938 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1939 { 1940 PetscErrorCode ierr; 1941 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1942 PetscBLASInt bnz,one=1; 1943 Mat_SeqBAIJ *x,*y; 1944 1945 PetscFunctionBegin; 1946 if (str == SAME_NONZERO_PATTERN) { 1947 PetscScalar alpha = a; 1948 x = (Mat_SeqBAIJ *)xx->A->data; 1949 y = (Mat_SeqBAIJ *)yy->A->data; 1950 bnz = PetscBLASIntCast(x->nz); 1951 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1952 x = (Mat_SeqBAIJ *)xx->B->data; 1953 y = (Mat_SeqBAIJ *)yy->B->data; 1954 bnz = PetscBLASIntCast(x->nz); 1955 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1956 } else { 1957 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1958 } 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1964 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1965 { 1966 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1967 PetscInt rbs,cbs; 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1972 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1973 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1974 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1975 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1976 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1982 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1983 { 1984 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1985 PetscErrorCode ierr; 1986 1987 PetscFunctionBegin; 1988 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1989 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1995 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1996 { 1997 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1998 PetscErrorCode ierr; 1999 2000 PetscFunctionBegin; 2001 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2002 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2008 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2009 { 2010 PetscErrorCode ierr; 2011 IS iscol_local; 2012 PetscInt csize; 2013 2014 PetscFunctionBegin; 2015 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2016 if (call == MAT_REUSE_MATRIX) { 2017 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2018 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2019 } else { 2020 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2021 } 2022 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2023 if (call == MAT_INITIAL_MATRIX) { 2024 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2025 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2026 } 2027 PetscFunctionReturn(0); 2028 } 2029 2030 #undef __FUNCT__ 2031 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2032 /* 2033 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2034 in local and then by concatenating the local matrices the end result. 2035 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2036 */ 2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2038 { 2039 PetscErrorCode ierr; 2040 PetscMPIInt rank,size; 2041 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2042 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2043 Mat *local,M,Mreuse; 2044 MatScalar *vwork,*aa; 2045 MPI_Comm comm = ((PetscObject)mat)->comm; 2046 Mat_SeqBAIJ *aij; 2047 2048 2049 PetscFunctionBegin; 2050 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2051 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2052 2053 if (call == MAT_REUSE_MATRIX) { 2054 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2055 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2056 local = &Mreuse; 2057 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2058 } else { 2059 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2060 Mreuse = *local; 2061 ierr = PetscFree(local);CHKERRQ(ierr); 2062 } 2063 2064 /* 2065 m - number of local rows 2066 n - number of columns (same on all processors) 2067 rstart - first row in new global matrix generated 2068 */ 2069 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2070 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2071 m = m/bs; 2072 n = n/bs; 2073 2074 if (call == MAT_INITIAL_MATRIX) { 2075 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2076 ii = aij->i; 2077 jj = aij->j; 2078 2079 /* 2080 Determine the number of non-zeros in the diagonal and off-diagonal 2081 portions of the matrix in order to do correct preallocation 2082 */ 2083 2084 /* first get start and end of "diagonal" columns */ 2085 if (csize == PETSC_DECIDE) { 2086 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2087 if (mglobal == n*bs) { /* square matrix */ 2088 nlocal = m; 2089 } else { 2090 nlocal = n/size + ((n % size) > rank); 2091 } 2092 } else { 2093 nlocal = csize/bs; 2094 } 2095 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2096 rstart = rend - nlocal; 2097 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2098 2099 /* next, compute all the lengths */ 2100 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2101 olens = dlens + m; 2102 for (i=0; i<m; i++) { 2103 jend = ii[i+1] - ii[i]; 2104 olen = 0; 2105 dlen = 0; 2106 for (j=0; j<jend; j++) { 2107 if (*jj < rstart || *jj >= rend) olen++; 2108 else dlen++; 2109 jj++; 2110 } 2111 olens[i] = olen; 2112 dlens[i] = dlen; 2113 } 2114 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2115 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2116 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2117 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2118 ierr = PetscFree(dlens);CHKERRQ(ierr); 2119 } else { 2120 PetscInt ml,nl; 2121 2122 M = *newmat; 2123 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2124 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2125 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2126 /* 2127 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2128 rather than the slower MatSetValues(). 2129 */ 2130 M->was_assembled = PETSC_TRUE; 2131 M->assembled = PETSC_FALSE; 2132 } 2133 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2134 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2135 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2136 ii = aij->i; 2137 jj = aij->j; 2138 aa = aij->a; 2139 for (i=0; i<m; i++) { 2140 row = rstart/bs + i; 2141 nz = ii[i+1] - ii[i]; 2142 cwork = jj; jj += nz; 2143 vwork = aa; aa += nz; 2144 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2145 } 2146 2147 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2148 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2149 *newmat = M; 2150 2151 /* save submatrix used in processor for next request */ 2152 if (call == MAT_INITIAL_MATRIX) { 2153 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2154 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2155 } 2156 2157 PetscFunctionReturn(0); 2158 } 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatPermute_MPIBAIJ" 2162 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2163 { 2164 MPI_Comm comm,pcomm; 2165 PetscInt first,local_size,nrows; 2166 const PetscInt *rows; 2167 PetscMPIInt size; 2168 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2173 /* make a collective version of 'rowp' */ 2174 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2175 if (pcomm==comm) { 2176 crowp = rowp; 2177 } else { 2178 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2179 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2180 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2181 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2182 } 2183 /* collect the global row permutation and invert it */ 2184 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2185 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2186 if (pcomm!=comm) { 2187 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2188 } 2189 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2190 /* get the local target indices */ 2191 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2192 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2193 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2194 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2195 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2196 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2197 /* the column permutation is so much easier; 2198 make a local version of 'colp' and invert it */ 2199 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2200 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2201 if (size==1) { 2202 lcolp = colp; 2203 } else { 2204 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2205 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2206 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2207 } 2208 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2209 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2210 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2211 if (size>1) { 2212 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2213 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2214 } 2215 /* now we just get the submatrix */ 2216 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2217 /* clean up */ 2218 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2219 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2225 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2226 { 2227 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2228 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2229 2230 PetscFunctionBegin; 2231 if (nghosts) { *nghosts = B->nbs;} 2232 if (ghosts) {*ghosts = baij->garray;} 2233 PetscFunctionReturn(0); 2234 } 2235 2236 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2240 /* 2241 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2242 */ 2243 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2244 { 2245 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2246 PetscErrorCode ierr; 2247 PetscMPIInt size,*ncolsonproc,*disp,nn; 2248 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2249 const PetscInt *is; 2250 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2251 PetscInt *rowhit,M,cstart,cend,colb; 2252 PetscInt *columnsforrow,l; 2253 IS *isa; 2254 PetscBool done,flg; 2255 ISLocalToGlobalMapping map = mat->bmapping; 2256 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2257 2258 PetscFunctionBegin; 2259 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2260 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2261 2262 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2263 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2264 M = mat->rmap->n/bs; 2265 cstart = mat->cmap->rstart/bs; 2266 cend = mat->cmap->rend/bs; 2267 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2268 c->N = mat->cmap->N/bs; 2269 c->m = mat->rmap->n/bs; 2270 c->rstart = mat->rmap->rstart/bs; 2271 2272 c->ncolors = nis; 2273 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2274 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2275 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2276 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2277 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2278 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2279 2280 /* Allow access to data structures of local part of matrix */ 2281 if (!baij->colmap) { 2282 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2283 } 2284 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2285 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2286 2287 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2288 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2289 2290 for (i=0; i<nis; i++) { 2291 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2292 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2293 c->ncolumns[i] = n; 2294 if (n) { 2295 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2296 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2297 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2298 } else { 2299 c->columns[i] = 0; 2300 } 2301 2302 if (ctype == IS_COLORING_GLOBAL){ 2303 /* Determine the total (parallel) number of columns of this color */ 2304 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2305 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2306 2307 nn = PetscMPIIntCast(n); 2308 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2309 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2310 if (!nctot) { 2311 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2312 } 2313 2314 disp[0] = 0; 2315 for (j=1; j<size; j++) { 2316 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2317 } 2318 2319 /* Get complete list of columns for color on each processor */ 2320 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2321 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2322 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2323 } else if (ctype == IS_COLORING_GHOSTED){ 2324 /* Determine local number of columns of this color on this process, including ghost points */ 2325 nctot = n; 2326 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2327 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2328 } else { 2329 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2330 } 2331 2332 /* 2333 Mark all rows affect by these columns 2334 */ 2335 /* Temporary option to allow for debugging/testing */ 2336 flg = PETSC_FALSE; 2337 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2338 if (!flg) {/*-----------------------------------------------------------------------------*/ 2339 /* crude, fast version */ 2340 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2341 /* loop over columns*/ 2342 for (j=0; j<nctot; j++) { 2343 if (ctype == IS_COLORING_GHOSTED) { 2344 col = ltog[cols[j]]; 2345 } else { 2346 col = cols[j]; 2347 } 2348 if (col >= cstart && col < cend) { 2349 /* column is in diagonal block of matrix */ 2350 rows = A_cj + A_ci[col-cstart]; 2351 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2352 } else { 2353 #if defined (PETSC_USE_CTABLE) 2354 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2355 colb --; 2356 #else 2357 colb = baij->colmap[col] - 1; 2358 #endif 2359 if (colb == -1) { 2360 m = 0; 2361 } else { 2362 colb = colb/bs; 2363 rows = B_cj + B_ci[colb]; 2364 m = B_ci[colb+1] - B_ci[colb]; 2365 } 2366 } 2367 /* loop over columns marking them in rowhit */ 2368 for (k=0; k<m; k++) { 2369 rowhit[*rows++] = col + 1; 2370 } 2371 } 2372 2373 /* count the number of hits */ 2374 nrows = 0; 2375 for (j=0; j<M; j++) { 2376 if (rowhit[j]) nrows++; 2377 } 2378 c->nrows[i] = nrows; 2379 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2380 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2381 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2382 nrows = 0; 2383 for (j=0; j<M; j++) { 2384 if (rowhit[j]) { 2385 c->rows[i][nrows] = j; 2386 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2387 nrows++; 2388 } 2389 } 2390 } else {/*-------------------------------------------------------------------------------*/ 2391 /* slow version, using rowhit as a linked list */ 2392 PetscInt currentcol,fm,mfm; 2393 rowhit[M] = M; 2394 nrows = 0; 2395 /* loop over columns*/ 2396 for (j=0; j<nctot; j++) { 2397 if (ctype == IS_COLORING_GHOSTED) { 2398 col = ltog[cols[j]]; 2399 } else { 2400 col = cols[j]; 2401 } 2402 if (col >= cstart && col < cend) { 2403 /* column is in diagonal block of matrix */ 2404 rows = A_cj + A_ci[col-cstart]; 2405 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2406 } else { 2407 #if defined (PETSC_USE_CTABLE) 2408 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2409 colb --; 2410 #else 2411 colb = baij->colmap[col] - 1; 2412 #endif 2413 if (colb == -1) { 2414 m = 0; 2415 } else { 2416 colb = colb/bs; 2417 rows = B_cj + B_ci[colb]; 2418 m = B_ci[colb+1] - B_ci[colb]; 2419 } 2420 } 2421 2422 /* loop over columns marking them in rowhit */ 2423 fm = M; /* fm points to first entry in linked list */ 2424 for (k=0; k<m; k++) { 2425 currentcol = *rows++; 2426 /* is it already in the list? */ 2427 do { 2428 mfm = fm; 2429 fm = rowhit[fm]; 2430 } while (fm < currentcol); 2431 /* not in list so add it */ 2432 if (fm != currentcol) { 2433 nrows++; 2434 columnsforrow[currentcol] = col; 2435 /* next three lines insert new entry into linked list */ 2436 rowhit[mfm] = currentcol; 2437 rowhit[currentcol] = fm; 2438 fm = currentcol; 2439 /* fm points to present position in list since we know the columns are sorted */ 2440 } else { 2441 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2442 } 2443 } 2444 } 2445 c->nrows[i] = nrows; 2446 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2447 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2448 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2449 /* now store the linked list of rows into c->rows[i] */ 2450 nrows = 0; 2451 fm = rowhit[M]; 2452 do { 2453 c->rows[i][nrows] = fm; 2454 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2455 fm = rowhit[fm]; 2456 } while (fm < M); 2457 } /* ---------------------------------------------------------------------------------------*/ 2458 ierr = PetscFree(cols);CHKERRQ(ierr); 2459 } 2460 2461 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2462 /* 2463 vscale will contain the "diagonal" on processor scalings followed by the off processor 2464 */ 2465 if (ctype == IS_COLORING_GLOBAL) { 2466 PetscInt *garray; 2467 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2468 for (i=0; i<baij->B->cmap->n/bs; i++) { 2469 for (j=0; j<bs; j++) { 2470 garray[i*bs+j] = bs*baij->garray[i]+j; 2471 } 2472 } 2473 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2474 ierr = PetscFree(garray);CHKERRQ(ierr); 2475 CHKMEMQ; 2476 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2477 for (k=0; k<c->ncolors; k++) { 2478 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2479 for (l=0; l<c->nrows[k]; l++) { 2480 col = c->columnsforrow[k][l]; 2481 if (col >= cstart && col < cend) { 2482 /* column is in diagonal block of matrix */ 2483 colb = col - cstart; 2484 } else { 2485 /* column is in "off-processor" part */ 2486 #if defined (PETSC_USE_CTABLE) 2487 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2488 colb --; 2489 #else 2490 colb = baij->colmap[col] - 1; 2491 #endif 2492 colb = colb/bs; 2493 colb += cend - cstart; 2494 } 2495 c->vscaleforrow[k][l] = colb; 2496 } 2497 } 2498 } else if (ctype == IS_COLORING_GHOSTED) { 2499 /* Get gtol mapping */ 2500 PetscInt N = mat->cmap->N, *gtol; 2501 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2502 for (i=0; i<N; i++) gtol[i] = -1; 2503 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2504 2505 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2506 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2507 for (k=0; k<c->ncolors; k++) { 2508 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2509 for (l=0; l<c->nrows[k]; l++) { 2510 col = c->columnsforrow[k][l]; /* global column index */ 2511 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2512 } 2513 } 2514 ierr = PetscFree(gtol);CHKERRQ(ierr); 2515 } 2516 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2517 2518 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2519 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2520 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2521 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2522 CHKMEMQ; 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2528 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2529 { 2530 Mat B; 2531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2532 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2533 Mat_SeqAIJ *b; 2534 PetscErrorCode ierr; 2535 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2536 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2537 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2538 2539 PetscFunctionBegin; 2540 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2541 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2542 2543 /* ---------------------------------------------------------------- 2544 Tell every processor the number of nonzeros per row 2545 */ 2546 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2547 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2548 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2549 } 2550 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2551 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2552 displs = recvcounts + size; 2553 for (i=0; i<size; i++) { 2554 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2555 displs[i] = A->rmap->range[i]/bs; 2556 } 2557 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2558 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2559 #else 2560 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2561 #endif 2562 /* --------------------------------------------------------------- 2563 Create the sequential matrix of the same type as the local block diagonal 2564 */ 2565 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2566 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2567 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2568 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2569 b = (Mat_SeqAIJ *)B->data; 2570 2571 /*-------------------------------------------------------------------- 2572 Copy my part of matrix column indices over 2573 */ 2574 sendcount = ad->nz + bd->nz; 2575 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2576 a_jsendbuf = ad->j; 2577 b_jsendbuf = bd->j; 2578 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2579 cnt = 0; 2580 for (i=0; i<n; i++) { 2581 2582 /* put in lower diagonal portion */ 2583 m = bd->i[i+1] - bd->i[i]; 2584 while (m > 0) { 2585 /* is it above diagonal (in bd (compressed) numbering) */ 2586 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2587 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2588 m--; 2589 } 2590 2591 /* put in diagonal portion */ 2592 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2593 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2594 } 2595 2596 /* put in upper diagonal portion */ 2597 while (m-- > 0) { 2598 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2599 } 2600 } 2601 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2602 2603 /*-------------------------------------------------------------------- 2604 Gather all column indices to all processors 2605 */ 2606 for (i=0; i<size; i++) { 2607 recvcounts[i] = 0; 2608 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2609 recvcounts[i] += lens[j]; 2610 } 2611 } 2612 displs[0] = 0; 2613 for (i=1; i<size; i++) { 2614 displs[i] = displs[i-1] + recvcounts[i-1]; 2615 } 2616 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2617 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2618 #else 2619 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2620 #endif 2621 /*-------------------------------------------------------------------- 2622 Assemble the matrix into useable form (note numerical values not yet set) 2623 */ 2624 /* set the b->ilen (length of each row) values */ 2625 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2626 /* set the b->i indices */ 2627 b->i[0] = 0; 2628 for (i=1; i<=A->rmap->N/bs; i++) { 2629 b->i[i] = b->i[i-1] + lens[i-1]; 2630 } 2631 ierr = PetscFree(lens);CHKERRQ(ierr); 2632 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2633 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2634 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2635 2636 if (A->symmetric){ 2637 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2638 } else if (A->hermitian) { 2639 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2640 } else if (A->structurally_symmetric) { 2641 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2642 } 2643 *newmat = B; 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatSOR_MPIBAIJ" 2649 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2650 { 2651 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2652 PetscErrorCode ierr; 2653 Vec bb1 = 0; 2654 2655 PetscFunctionBegin; 2656 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2657 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2658 } 2659 2660 if (flag == SOR_APPLY_UPPER) { 2661 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2666 if (flag & SOR_ZERO_INITIAL_GUESS) { 2667 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2668 its--; 2669 } 2670 2671 while (its--) { 2672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2674 2675 /* update rhs: bb1 = bb - B*x */ 2676 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2677 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2678 2679 /* local sweep */ 2680 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2681 } 2682 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2683 if (flag & SOR_ZERO_INITIAL_GUESS) { 2684 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2685 its--; 2686 } 2687 while (its--) { 2688 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2689 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2690 2691 /* update rhs: bb1 = bb - B*x */ 2692 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2693 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2694 2695 /* local sweep */ 2696 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2697 } 2698 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2699 if (flag & SOR_ZERO_INITIAL_GUESS) { 2700 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2701 its--; 2702 } 2703 while (its--) { 2704 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2705 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 2707 /* update rhs: bb1 = bb - B*x */ 2708 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2709 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2710 2711 /* local sweep */ 2712 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2713 } 2714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2715 2716 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2717 PetscFunctionReturn(0); 2718 } 2719 2720 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2721 2722 2723 /* -------------------------------------------------------------------*/ 2724 static struct _MatOps MatOps_Values = { 2725 MatSetValues_MPIBAIJ, 2726 MatGetRow_MPIBAIJ, 2727 MatRestoreRow_MPIBAIJ, 2728 MatMult_MPIBAIJ, 2729 /* 4*/ MatMultAdd_MPIBAIJ, 2730 MatMultTranspose_MPIBAIJ, 2731 MatMultTransposeAdd_MPIBAIJ, 2732 0, 2733 0, 2734 0, 2735 /*10*/ 0, 2736 0, 2737 0, 2738 MatSOR_MPIBAIJ, 2739 MatTranspose_MPIBAIJ, 2740 /*15*/ MatGetInfo_MPIBAIJ, 2741 MatEqual_MPIBAIJ, 2742 MatGetDiagonal_MPIBAIJ, 2743 MatDiagonalScale_MPIBAIJ, 2744 MatNorm_MPIBAIJ, 2745 /*20*/ MatAssemblyBegin_MPIBAIJ, 2746 MatAssemblyEnd_MPIBAIJ, 2747 MatSetOption_MPIBAIJ, 2748 MatZeroEntries_MPIBAIJ, 2749 /*24*/ MatZeroRows_MPIBAIJ, 2750 0, 2751 0, 2752 0, 2753 0, 2754 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2755 0, 2756 0, 2757 0, 2758 0, 2759 /*34*/ MatDuplicate_MPIBAIJ, 2760 0, 2761 0, 2762 0, 2763 0, 2764 /*39*/ MatAXPY_MPIBAIJ, 2765 MatGetSubMatrices_MPIBAIJ, 2766 MatIncreaseOverlap_MPIBAIJ, 2767 MatGetValues_MPIBAIJ, 2768 MatCopy_MPIBAIJ, 2769 /*44*/ 0, 2770 MatScale_MPIBAIJ, 2771 0, 2772 0, 2773 0, 2774 /*49*/ MatSetBlockSize_MPIBAIJ, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*54*/ MatFDColoringCreate_MPIBAIJ, 2780 0, 2781 MatSetUnfactored_MPIBAIJ, 2782 MatPermute_MPIBAIJ, 2783 MatSetValuesBlocked_MPIBAIJ, 2784 /*59*/ MatGetSubMatrix_MPIBAIJ, 2785 MatDestroy_MPIBAIJ, 2786 MatView_MPIBAIJ, 2787 0, 2788 0, 2789 /*64*/ 0, 2790 0, 2791 0, 2792 0, 2793 0, 2794 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2795 0, 2796 0, 2797 0, 2798 0, 2799 /*74*/ 0, 2800 MatFDColoringApply_BAIJ, 2801 0, 2802 0, 2803 0, 2804 /*79*/ 0, 2805 0, 2806 0, 2807 0, 2808 MatLoad_MPIBAIJ, 2809 /*84*/ 0, 2810 0, 2811 0, 2812 0, 2813 0, 2814 /*89*/ 0, 2815 0, 2816 0, 2817 0, 2818 0, 2819 /*94*/ 0, 2820 0, 2821 0, 2822 0, 2823 0, 2824 /*99*/ 0, 2825 0, 2826 0, 2827 0, 2828 0, 2829 /*104*/0, 2830 MatRealPart_MPIBAIJ, 2831 MatImaginaryPart_MPIBAIJ, 2832 0, 2833 0, 2834 /*109*/0, 2835 0, 2836 0, 2837 0, 2838 0, 2839 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2840 0, 2841 MatGetGhosts_MPIBAIJ, 2842 0, 2843 0, 2844 /*119*/0, 2845 0, 2846 0, 2847 0 2848 }; 2849 2850 EXTERN_C_BEGIN 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2853 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2854 { 2855 PetscFunctionBegin; 2856 *a = ((Mat_MPIBAIJ *)A->data)->A; 2857 *iscopy = PETSC_FALSE; 2858 PetscFunctionReturn(0); 2859 } 2860 EXTERN_C_END 2861 2862 EXTERN_C_BEGIN 2863 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2864 EXTERN_C_END 2865 2866 EXTERN_C_BEGIN 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2869 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2870 { 2871 PetscInt m,rstart,cstart,cend; 2872 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2873 const PetscInt *JJ=0; 2874 PetscScalar *values=0; 2875 PetscErrorCode ierr; 2876 2877 PetscFunctionBegin; 2878 2879 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2880 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2881 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2882 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2883 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2884 m = B->rmap->n/bs; 2885 rstart = B->rmap->rstart/bs; 2886 cstart = B->cmap->rstart/bs; 2887 cend = B->cmap->rend/bs; 2888 2889 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2890 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2891 for (i=0; i<m; i++) { 2892 nz = ii[i+1] - ii[i]; 2893 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2894 nz_max = PetscMax(nz_max,nz); 2895 JJ = jj + ii[i]; 2896 for (j=0; j<nz; j++) { 2897 if (*JJ >= cstart) break; 2898 JJ++; 2899 } 2900 d = 0; 2901 for (; j<nz; j++) { 2902 if (*JJ++ >= cend) break; 2903 d++; 2904 } 2905 d_nnz[i] = d; 2906 o_nnz[i] = nz - d; 2907 } 2908 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2909 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2910 2911 values = (PetscScalar*)V; 2912 if (!values) { 2913 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2914 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2915 } 2916 for (i=0; i<m; i++) { 2917 PetscInt row = i + rstart; 2918 PetscInt ncols = ii[i+1] - ii[i]; 2919 const PetscInt *icols = jj + ii[i]; 2920 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2921 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2922 } 2923 2924 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2925 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2926 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2927 2928 PetscFunctionReturn(0); 2929 } 2930 EXTERN_C_END 2931 2932 #undef __FUNCT__ 2933 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2934 /*@C 2935 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2936 (the default parallel PETSc format). 2937 2938 Collective on MPI_Comm 2939 2940 Input Parameters: 2941 + A - the matrix 2942 . bs - the block size 2943 . i - the indices into j for the start of each local row (starts with zero) 2944 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2945 - v - optional values in the matrix 2946 2947 Level: developer 2948 2949 .keywords: matrix, aij, compressed row, sparse, parallel 2950 2951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2952 @*/ 2953 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2954 { 2955 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2956 2957 PetscFunctionBegin; 2958 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2959 if (f) { 2960 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2961 } 2962 PetscFunctionReturn(0); 2963 } 2964 2965 EXTERN_C_BEGIN 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2968 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2969 { 2970 Mat_MPIBAIJ *b; 2971 PetscErrorCode ierr; 2972 PetscInt i, newbs = PetscAbs(bs); 2973 2974 PetscFunctionBegin; 2975 if (bs < 0) { 2976 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2977 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2978 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2979 bs = PetscAbs(bs); 2980 } 2981 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2982 bs = newbs; 2983 2984 2985 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2986 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2987 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2988 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2989 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2990 2991 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2992 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2993 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2994 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2995 2996 if (d_nnz) { 2997 for (i=0; i<B->rmap->n/bs; i++) { 2998 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2999 } 3000 } 3001 if (o_nnz) { 3002 for (i=0; i<B->rmap->n/bs; i++) { 3003 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3004 } 3005 } 3006 3007 b = (Mat_MPIBAIJ*)B->data; 3008 b->bs2 = bs*bs; 3009 b->mbs = B->rmap->n/bs; 3010 b->nbs = B->cmap->n/bs; 3011 b->Mbs = B->rmap->N/bs; 3012 b->Nbs = B->cmap->N/bs; 3013 3014 for (i=0; i<=b->size; i++) { 3015 b->rangebs[i] = B->rmap->range[i]/bs; 3016 } 3017 b->rstartbs = B->rmap->rstart/bs; 3018 b->rendbs = B->rmap->rend/bs; 3019 b->cstartbs = B->cmap->rstart/bs; 3020 b->cendbs = B->cmap->rend/bs; 3021 3022 if (!B->preallocated) { 3023 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3024 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3025 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3026 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3027 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3028 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3029 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3030 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3031 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3032 } 3033 3034 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3035 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3036 B->preallocated = PETSC_TRUE; 3037 PetscFunctionReturn(0); 3038 } 3039 EXTERN_C_END 3040 3041 EXTERN_C_BEGIN 3042 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3043 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3044 EXTERN_C_END 3045 3046 3047 EXTERN_C_BEGIN 3048 #undef __FUNCT__ 3049 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3050 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3051 { 3052 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3053 PetscErrorCode ierr; 3054 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3055 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3056 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3057 3058 PetscFunctionBegin; 3059 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3060 ii[0] = 0; 3061 CHKMEMQ; 3062 for (i=0; i<M; i++) { 3063 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3064 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3065 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3066 /* remove one from count of matrix has diagonal */ 3067 for (j=id[i]; j<id[i+1]; j++) { 3068 if (jd[j] == i) {ii[i+1]--;break;} 3069 } 3070 CHKMEMQ; 3071 } 3072 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3073 cnt = 0; 3074 for (i=0; i<M; i++) { 3075 for (j=io[i]; j<io[i+1]; j++) { 3076 if (garray[jo[j]] > rstart) break; 3077 jj[cnt++] = garray[jo[j]]; 3078 CHKMEMQ; 3079 } 3080 for (k=id[i]; k<id[i+1]; k++) { 3081 if (jd[k] != i) { 3082 jj[cnt++] = rstart + jd[k]; 3083 CHKMEMQ; 3084 } 3085 } 3086 for (;j<io[i+1]; j++) { 3087 jj[cnt++] = garray[jo[j]]; 3088 CHKMEMQ; 3089 } 3090 } 3091 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 EXTERN_C_END 3095 3096 EXTERN_C_BEGIN 3097 #if defined(PETSC_HAVE_MUMPS) 3098 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3099 #endif 3100 EXTERN_C_END 3101 3102 /*MC 3103 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3104 3105 Options Database Keys: 3106 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3107 . -mat_block_size <bs> - set the blocksize used to store the matrix 3108 - -mat_use_hash_table <fact> 3109 3110 Level: beginner 3111 3112 .seealso: MatCreateMPIBAIJ 3113 M*/ 3114 3115 EXTERN_C_BEGIN 3116 #undef __FUNCT__ 3117 #define __FUNCT__ "MatCreate_MPIBAIJ" 3118 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 3119 { 3120 Mat_MPIBAIJ *b; 3121 PetscErrorCode ierr; 3122 PetscBool flg; 3123 3124 PetscFunctionBegin; 3125 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3126 B->data = (void*)b; 3127 3128 3129 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3130 B->mapping = 0; 3131 B->assembled = PETSC_FALSE; 3132 3133 B->insertmode = NOT_SET_VALUES; 3134 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3135 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3136 3137 /* build local table of row and column ownerships */ 3138 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3139 3140 /* build cache for off array entries formed */ 3141 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3142 b->donotstash = PETSC_FALSE; 3143 b->colmap = PETSC_NULL; 3144 b->garray = PETSC_NULL; 3145 b->roworiented = PETSC_TRUE; 3146 3147 /* stuff used in block assembly */ 3148 b->barray = 0; 3149 3150 /* stuff used for matrix vector multiply */ 3151 b->lvec = 0; 3152 b->Mvctx = 0; 3153 3154 /* stuff for MatGetRow() */ 3155 b->rowindices = 0; 3156 b->rowvalues = 0; 3157 b->getrowactive = PETSC_FALSE; 3158 3159 /* hash table stuff */ 3160 b->ht = 0; 3161 b->hd = 0; 3162 b->ht_size = 0; 3163 b->ht_flag = PETSC_FALSE; 3164 b->ht_fact = 0; 3165 b->ht_total_ct = 0; 3166 b->ht_insert_ct = 0; 3167 3168 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3169 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3170 if (flg) { 3171 PetscReal fact = 1.39; 3172 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3173 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3174 if (fact <= 1.0) fact = 1.39; 3175 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3176 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3177 } 3178 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3179 3180 #if defined(PETSC_HAVE_MUMPS) 3181 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3182 #endif 3183 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3184 "MatConvert_MPIBAIJ_MPIAdj", 3185 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3186 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3187 "MatStoreValues_MPIBAIJ", 3188 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3189 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3190 "MatRetrieveValues_MPIBAIJ", 3191 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3192 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3193 "MatGetDiagonalBlock_MPIBAIJ", 3194 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3195 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3196 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3197 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3198 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3199 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3200 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3201 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3202 "MatDiagonalScaleLocal_MPIBAIJ", 3203 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3204 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3205 "MatSetHashTableFactor_MPIBAIJ", 3206 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3207 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3208 PetscFunctionReturn(0); 3209 } 3210 EXTERN_C_END 3211 3212 /*MC 3213 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3214 3215 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3216 and MATMPIBAIJ otherwise. 3217 3218 Options Database Keys: 3219 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3220 3221 Level: beginner 3222 3223 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3224 M*/ 3225 3226 EXTERN_C_BEGIN 3227 #undef __FUNCT__ 3228 #define __FUNCT__ "MatCreate_BAIJ" 3229 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3230 { 3231 PetscErrorCode ierr; 3232 PetscMPIInt size; 3233 3234 PetscFunctionBegin; 3235 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3236 if (size == 1) { 3237 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3238 } else { 3239 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3240 } 3241 PetscFunctionReturn(0); 3242 } 3243 EXTERN_C_END 3244 3245 #undef __FUNCT__ 3246 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3247 /*@C 3248 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3249 (block compressed row). For good matrix assembly performance 3250 the user should preallocate the matrix storage by setting the parameters 3251 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3252 performance can be increased by more than a factor of 50. 3253 3254 Collective on Mat 3255 3256 Input Parameters: 3257 + A - the matrix 3258 . bs - size of blockk 3259 . d_nz - number of block nonzeros per block row in diagonal portion of local 3260 submatrix (same for all local rows) 3261 . d_nnz - array containing the number of block nonzeros in the various block rows 3262 of the in diagonal portion of the local (possibly different for each block 3263 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3264 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3265 submatrix (same for all local rows). 3266 - o_nnz - array containing the number of nonzeros in the various block rows of the 3267 off-diagonal portion of the local submatrix (possibly different for 3268 each block row) or PETSC_NULL. 3269 3270 If the *_nnz parameter is given then the *_nz parameter is ignored 3271 3272 Options Database Keys: 3273 + -mat_block_size - size of the blocks to use 3274 - -mat_use_hash_table <fact> 3275 3276 Notes: 3277 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3278 than it must be used on all processors that share the object for that argument. 3279 3280 Storage Information: 3281 For a square global matrix we define each processor's diagonal portion 3282 to be its local rows and the corresponding columns (a square submatrix); 3283 each processor's off-diagonal portion encompasses the remainder of the 3284 local matrix (a rectangular submatrix). 3285 3286 The user can specify preallocated storage for the diagonal part of 3287 the local submatrix with either d_nz or d_nnz (not both). Set 3288 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3289 memory allocation. Likewise, specify preallocated storage for the 3290 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3291 3292 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3293 the figure below we depict these three local rows and all columns (0-11). 3294 3295 .vb 3296 0 1 2 3 4 5 6 7 8 9 10 11 3297 ------------------- 3298 row 3 | o o o d d d o o o o o o 3299 row 4 | o o o d d d o o o o o o 3300 row 5 | o o o d d d o o o o o o 3301 ------------------- 3302 .ve 3303 3304 Thus, any entries in the d locations are stored in the d (diagonal) 3305 submatrix, and any entries in the o locations are stored in the 3306 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3307 stored simply in the MATSEQBAIJ format for compressed row storage. 3308 3309 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3310 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3311 In general, for PDE problems in which most nonzeros are near the diagonal, 3312 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3313 or you will get TERRIBLE performance; see the users' manual chapter on 3314 matrices. 3315 3316 You can call MatGetInfo() to get information on how effective the preallocation was; 3317 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3318 You can also run with the option -info and look for messages with the string 3319 malloc in them to see if additional memory allocation was needed. 3320 3321 Level: intermediate 3322 3323 .keywords: matrix, block, aij, compressed row, sparse, parallel 3324 3325 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3326 @*/ 3327 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3328 { 3329 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3330 3331 PetscFunctionBegin; 3332 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3333 if (f) { 3334 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3335 } 3336 PetscFunctionReturn(0); 3337 } 3338 3339 #undef __FUNCT__ 3340 #define __FUNCT__ "MatCreateMPIBAIJ" 3341 /*@C 3342 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3343 (block compressed row). For good matrix assembly performance 3344 the user should preallocate the matrix storage by setting the parameters 3345 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3346 performance can be increased by more than a factor of 50. 3347 3348 Collective on MPI_Comm 3349 3350 Input Parameters: 3351 + comm - MPI communicator 3352 . bs - size of blockk 3353 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3354 This value should be the same as the local size used in creating the 3355 y vector for the matrix-vector product y = Ax. 3356 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3357 This value should be the same as the local size used in creating the 3358 x vector for the matrix-vector product y = Ax. 3359 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3360 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3361 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3362 submatrix (same for all local rows) 3363 . d_nnz - array containing the number of nonzero blocks in the various block rows 3364 of the in diagonal portion of the local (possibly different for each block 3365 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3366 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3367 submatrix (same for all local rows). 3368 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3369 off-diagonal portion of the local submatrix (possibly different for 3370 each block row) or PETSC_NULL. 3371 3372 Output Parameter: 3373 . A - the matrix 3374 3375 Options Database Keys: 3376 + -mat_block_size - size of the blocks to use 3377 - -mat_use_hash_table <fact> 3378 3379 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3380 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3381 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3382 3383 Notes: 3384 If the *_nnz parameter is given then the *_nz parameter is ignored 3385 3386 A nonzero block is any block that as 1 or more nonzeros in it 3387 3388 The user MUST specify either the local or global matrix dimensions 3389 (possibly both). 3390 3391 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3392 than it must be used on all processors that share the object for that argument. 3393 3394 Storage Information: 3395 For a square global matrix we define each processor's diagonal portion 3396 to be its local rows and the corresponding columns (a square submatrix); 3397 each processor's off-diagonal portion encompasses the remainder of the 3398 local matrix (a rectangular submatrix). 3399 3400 The user can specify preallocated storage for the diagonal part of 3401 the local submatrix with either d_nz or d_nnz (not both). Set 3402 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3403 memory allocation. Likewise, specify preallocated storage for the 3404 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3405 3406 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3407 the figure below we depict these three local rows and all columns (0-11). 3408 3409 .vb 3410 0 1 2 3 4 5 6 7 8 9 10 11 3411 ------------------- 3412 row 3 | o o o d d d o o o o o o 3413 row 4 | o o o d d d o o o o o o 3414 row 5 | o o o d d d o o o o o o 3415 ------------------- 3416 .ve 3417 3418 Thus, any entries in the d locations are stored in the d (diagonal) 3419 submatrix, and any entries in the o locations are stored in the 3420 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3421 stored simply in the MATSEQBAIJ format for compressed row storage. 3422 3423 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3424 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3425 In general, for PDE problems in which most nonzeros are near the diagonal, 3426 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3427 or you will get TERRIBLE performance; see the users' manual chapter on 3428 matrices. 3429 3430 Level: intermediate 3431 3432 .keywords: matrix, block, aij, compressed row, sparse, parallel 3433 3434 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3435 @*/ 3436 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3437 { 3438 PetscErrorCode ierr; 3439 PetscMPIInt size; 3440 3441 PetscFunctionBegin; 3442 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3443 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3444 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3445 if (size > 1) { 3446 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3447 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3448 } else { 3449 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3450 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3451 } 3452 PetscFunctionReturn(0); 3453 } 3454 3455 #undef __FUNCT__ 3456 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3457 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3458 { 3459 Mat mat; 3460 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3461 PetscErrorCode ierr; 3462 PetscInt len=0; 3463 3464 PetscFunctionBegin; 3465 *newmat = 0; 3466 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3467 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3468 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3469 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3470 3471 mat->factortype = matin->factortype; 3472 mat->preallocated = PETSC_TRUE; 3473 mat->assembled = PETSC_TRUE; 3474 mat->insertmode = NOT_SET_VALUES; 3475 3476 a = (Mat_MPIBAIJ*)mat->data; 3477 mat->rmap->bs = matin->rmap->bs; 3478 a->bs2 = oldmat->bs2; 3479 a->mbs = oldmat->mbs; 3480 a->nbs = oldmat->nbs; 3481 a->Mbs = oldmat->Mbs; 3482 a->Nbs = oldmat->Nbs; 3483 3484 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3485 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3486 3487 a->size = oldmat->size; 3488 a->rank = oldmat->rank; 3489 a->donotstash = oldmat->donotstash; 3490 a->roworiented = oldmat->roworiented; 3491 a->rowindices = 0; 3492 a->rowvalues = 0; 3493 a->getrowactive = PETSC_FALSE; 3494 a->barray = 0; 3495 a->rstartbs = oldmat->rstartbs; 3496 a->rendbs = oldmat->rendbs; 3497 a->cstartbs = oldmat->cstartbs; 3498 a->cendbs = oldmat->cendbs; 3499 3500 /* hash table stuff */ 3501 a->ht = 0; 3502 a->hd = 0; 3503 a->ht_size = 0; 3504 a->ht_flag = oldmat->ht_flag; 3505 a->ht_fact = oldmat->ht_fact; 3506 a->ht_total_ct = 0; 3507 a->ht_insert_ct = 0; 3508 3509 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3510 if (oldmat->colmap) { 3511 #if defined (PETSC_USE_CTABLE) 3512 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3513 #else 3514 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3515 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3516 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3517 #endif 3518 } else a->colmap = 0; 3519 3520 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3521 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3522 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3523 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3524 } else a->garray = 0; 3525 3526 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3527 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3528 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3529 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3530 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3531 3532 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3533 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3534 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3535 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3536 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3537 *newmat = mat; 3538 3539 PetscFunctionReturn(0); 3540 } 3541 3542 #undef __FUNCT__ 3543 #define __FUNCT__ "MatLoad_MPIBAIJ" 3544 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3545 { 3546 PetscErrorCode ierr; 3547 int fd; 3548 PetscInt i,nz,j,rstart,rend; 3549 PetscScalar *vals,*buf; 3550 MPI_Comm comm = ((PetscObject)viewer)->comm; 3551 MPI_Status status; 3552 PetscMPIInt rank,size,maxnz; 3553 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3554 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3555 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3556 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3557 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3558 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3559 3560 PetscFunctionBegin; 3561 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3562 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3563 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3564 3565 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3566 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3567 if (!rank) { 3568 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3569 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3570 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3571 } 3572 3573 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3574 3575 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3576 M = header[1]; N = header[2]; 3577 3578 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3579 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3580 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3581 3582 /* If global sizes are set, check if they are consistent with that given in the file */ 3583 if (sizesset) { 3584 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3585 } 3586 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3587 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3588 3589 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3590 3591 /* 3592 This code adds extra rows to make sure the number of rows is 3593 divisible by the blocksize 3594 */ 3595 Mbs = M/bs; 3596 extra_rows = bs - M + bs*Mbs; 3597 if (extra_rows == bs) extra_rows = 0; 3598 else Mbs++; 3599 if (extra_rows && !rank) { 3600 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3601 } 3602 3603 /* determine ownership of all rows */ 3604 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3605 mbs = Mbs/size + ((Mbs % size) > rank); 3606 m = mbs*bs; 3607 } else { /* User set */ 3608 m = newmat->rmap->n; 3609 mbs = m/bs; 3610 } 3611 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3612 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3613 3614 /* process 0 needs enough room for process with most rows */ 3615 if (!rank) { 3616 mmax = rowners[1]; 3617 for (i=2; i<size; i++) { 3618 mmax = PetscMax(mmax,rowners[i]); 3619 } 3620 mmax*=bs; 3621 } else mmax = m; 3622 3623 rowners[0] = 0; 3624 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3625 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3626 rstart = rowners[rank]; 3627 rend = rowners[rank+1]; 3628 3629 /* distribute row lengths to all processors */ 3630 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3631 if (!rank) { 3632 mend = m; 3633 if (size == 1) mend = mend - extra_rows; 3634 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3635 for (j=mend; j<m; j++) locrowlens[j] = 1; 3636 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3637 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3638 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3639 for (j=0; j<m; j++) { 3640 procsnz[0] += locrowlens[j]; 3641 } 3642 for (i=1; i<size; i++) { 3643 mend = browners[i+1] - browners[i]; 3644 if (i == size-1) mend = mend - extra_rows; 3645 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3646 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3647 /* calculate the number of nonzeros on each processor */ 3648 for (j=0; j<browners[i+1]-browners[i]; j++) { 3649 procsnz[i] += rowlengths[j]; 3650 } 3651 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3652 } 3653 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3654 } else { 3655 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3656 } 3657 3658 if (!rank) { 3659 /* determine max buffer needed and allocate it */ 3660 maxnz = procsnz[0]; 3661 for (i=1; i<size; i++) { 3662 maxnz = PetscMax(maxnz,procsnz[i]); 3663 } 3664 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3665 3666 /* read in my part of the matrix column indices */ 3667 nz = procsnz[0]; 3668 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3669 mycols = ibuf; 3670 if (size == 1) nz -= extra_rows; 3671 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3672 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3673 3674 /* read in every ones (except the last) and ship off */ 3675 for (i=1; i<size-1; i++) { 3676 nz = procsnz[i]; 3677 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3678 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3679 } 3680 /* read in the stuff for the last proc */ 3681 if (size != 1) { 3682 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3683 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3684 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3685 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3686 } 3687 ierr = PetscFree(cols);CHKERRQ(ierr); 3688 } else { 3689 /* determine buffer space needed for message */ 3690 nz = 0; 3691 for (i=0; i<m; i++) { 3692 nz += locrowlens[i]; 3693 } 3694 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3695 mycols = ibuf; 3696 /* receive message of column indices*/ 3697 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3698 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3699 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3700 } 3701 3702 /* loop over local rows, determining number of off diagonal entries */ 3703 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3704 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3705 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3706 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3707 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3708 rowcount = 0; nzcount = 0; 3709 for (i=0; i<mbs; i++) { 3710 dcount = 0; 3711 odcount = 0; 3712 for (j=0; j<bs; j++) { 3713 kmax = locrowlens[rowcount]; 3714 for (k=0; k<kmax; k++) { 3715 tmp = mycols[nzcount++]/bs; 3716 if (!mask[tmp]) { 3717 mask[tmp] = 1; 3718 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3719 else masked1[dcount++] = tmp; 3720 } 3721 } 3722 rowcount++; 3723 } 3724 3725 dlens[i] = dcount; 3726 odlens[i] = odcount; 3727 3728 /* zero out the mask elements we set */ 3729 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3730 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3731 } 3732 3733 3734 if (!sizesset) { 3735 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3736 } 3737 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3738 3739 if (!rank) { 3740 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3741 /* read in my part of the matrix numerical values */ 3742 nz = procsnz[0]; 3743 vals = buf; 3744 mycols = ibuf; 3745 if (size == 1) nz -= extra_rows; 3746 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3747 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3748 3749 /* insert into matrix */ 3750 jj = rstart*bs; 3751 for (i=0; i<m; i++) { 3752 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3753 mycols += locrowlens[i]; 3754 vals += locrowlens[i]; 3755 jj++; 3756 } 3757 /* read in other processors (except the last one) and ship out */ 3758 for (i=1; i<size-1; i++) { 3759 nz = procsnz[i]; 3760 vals = buf; 3761 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3762 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3763 } 3764 /* the last proc */ 3765 if (size != 1){ 3766 nz = procsnz[i] - extra_rows; 3767 vals = buf; 3768 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3769 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3770 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3771 } 3772 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3773 } else { 3774 /* receive numeric values */ 3775 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3776 3777 /* receive message of values*/ 3778 vals = buf; 3779 mycols = ibuf; 3780 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3781 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3782 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3783 3784 /* insert into matrix */ 3785 jj = rstart*bs; 3786 for (i=0; i<m; i++) { 3787 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3788 mycols += locrowlens[i]; 3789 vals += locrowlens[i]; 3790 jj++; 3791 } 3792 } 3793 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3794 ierr = PetscFree(buf);CHKERRQ(ierr); 3795 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3796 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3797 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3798 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3799 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3800 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3801 3802 PetscFunctionReturn(0); 3803 } 3804 3805 #undef __FUNCT__ 3806 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3807 /*@ 3808 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3809 3810 Input Parameters: 3811 . mat - the matrix 3812 . fact - factor 3813 3814 Not Collective, each process can use a different factor 3815 3816 Level: advanced 3817 3818 Notes: 3819 This can also be set by the command line option: -mat_use_hash_table <fact> 3820 3821 .keywords: matrix, hashtable, factor, HT 3822 3823 .seealso: MatSetOption() 3824 @*/ 3825 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3826 { 3827 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3828 3829 PetscFunctionBegin; 3830 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3831 if (f) { 3832 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3833 } 3834 PetscFunctionReturn(0); 3835 } 3836 3837 EXTERN_C_BEGIN 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3840 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3841 { 3842 Mat_MPIBAIJ *baij; 3843 3844 PetscFunctionBegin; 3845 baij = (Mat_MPIBAIJ*)mat->data; 3846 baij->ht_fact = fact; 3847 PetscFunctionReturn(0); 3848 } 3849 EXTERN_C_END 3850 3851 #undef __FUNCT__ 3852 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3853 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3854 { 3855 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3856 PetscFunctionBegin; 3857 *Ad = a->A; 3858 *Ao = a->B; 3859 *colmap = a->garray; 3860 PetscFunctionReturn(0); 3861 } 3862 3863 /* 3864 Special version for direct calls from Fortran (to eliminate two function call overheads 3865 */ 3866 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3867 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3868 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3869 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3870 #endif 3871 3872 #undef __FUNCT__ 3873 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3874 /*@C 3875 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3876 3877 Collective on Mat 3878 3879 Input Parameters: 3880 + mat - the matrix 3881 . min - number of input rows 3882 . im - input rows 3883 . nin - number of input columns 3884 . in - input columns 3885 . v - numerical values input 3886 - addvin - INSERT_VALUES or ADD_VALUES 3887 3888 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3889 3890 Level: advanced 3891 3892 .seealso: MatSetValuesBlocked() 3893 @*/ 3894 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3895 { 3896 /* convert input arguments to C version */ 3897 Mat mat = *matin; 3898 PetscInt m = *min, n = *nin; 3899 InsertMode addv = *addvin; 3900 3901 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3902 const MatScalar *value; 3903 MatScalar *barray=baij->barray; 3904 PetscBool roworiented = baij->roworiented; 3905 PetscErrorCode ierr; 3906 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3907 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3908 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3909 3910 PetscFunctionBegin; 3911 /* tasks normally handled by MatSetValuesBlocked() */ 3912 if (mat->insertmode == NOT_SET_VALUES) { 3913 mat->insertmode = addv; 3914 } 3915 #if defined(PETSC_USE_DEBUG) 3916 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3917 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3918 #endif 3919 if (mat->assembled) { 3920 mat->was_assembled = PETSC_TRUE; 3921 mat->assembled = PETSC_FALSE; 3922 } 3923 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3924 3925 3926 if(!barray) { 3927 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3928 baij->barray = barray; 3929 } 3930 3931 if (roworiented) { 3932 stepval = (n-1)*bs; 3933 } else { 3934 stepval = (m-1)*bs; 3935 } 3936 for (i=0; i<m; i++) { 3937 if (im[i] < 0) continue; 3938 #if defined(PETSC_USE_DEBUG) 3939 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3940 #endif 3941 if (im[i] >= rstart && im[i] < rend) { 3942 row = im[i] - rstart; 3943 for (j=0; j<n; j++) { 3944 /* If NumCol = 1 then a copy is not required */ 3945 if ((roworiented) && (n == 1)) { 3946 barray = (MatScalar*)v + i*bs2; 3947 } else if((!roworiented) && (m == 1)) { 3948 barray = (MatScalar*)v + j*bs2; 3949 } else { /* Here a copy is required */ 3950 if (roworiented) { 3951 value = v + i*(stepval+bs)*bs + j*bs; 3952 } else { 3953 value = v + j*(stepval+bs)*bs + i*bs; 3954 } 3955 for (ii=0; ii<bs; ii++,value+=stepval) { 3956 for (jj=0; jj<bs; jj++) { 3957 *barray++ = *value++; 3958 } 3959 } 3960 barray -=bs2; 3961 } 3962 3963 if (in[j] >= cstart && in[j] < cend){ 3964 col = in[j] - cstart; 3965 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3966 } 3967 else if (in[j] < 0) continue; 3968 #if defined(PETSC_USE_DEBUG) 3969 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3970 #endif 3971 else { 3972 if (mat->was_assembled) { 3973 if (!baij->colmap) { 3974 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3975 } 3976 3977 #if defined(PETSC_USE_DEBUG) 3978 #if defined (PETSC_USE_CTABLE) 3979 { PetscInt data; 3980 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3981 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3982 } 3983 #else 3984 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3985 #endif 3986 #endif 3987 #if defined (PETSC_USE_CTABLE) 3988 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3989 col = (col - 1)/bs; 3990 #else 3991 col = (baij->colmap[in[j]] - 1)/bs; 3992 #endif 3993 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3994 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3995 col = in[j]; 3996 } 3997 } 3998 else col = in[j]; 3999 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4000 } 4001 } 4002 } else { 4003 if (!baij->donotstash) { 4004 if (roworiented) { 4005 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4006 } else { 4007 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4008 } 4009 } 4010 } 4011 } 4012 4013 /* task normally handled by MatSetValuesBlocked() */ 4014 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4015 PetscFunctionReturn(0); 4016 } 4017 4018 #undef __FUNCT__ 4019 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4020 /*@ 4021 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4022 CSR format the local rows. 4023 4024 Collective on MPI_Comm 4025 4026 Input Parameters: 4027 + comm - MPI communicator 4028 . bs - the block size, only a block size of 1 is supported 4029 . m - number of local rows (Cannot be PETSC_DECIDE) 4030 . n - This value should be the same as the local size used in creating the 4031 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4032 calculated if N is given) For square matrices n is almost always m. 4033 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4034 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4035 . i - row indices 4036 . j - column indices 4037 - a - matrix values 4038 4039 Output Parameter: 4040 . mat - the matrix 4041 4042 Level: intermediate 4043 4044 Notes: 4045 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4046 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4047 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4048 4049 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4050 4051 .keywords: matrix, aij, compressed row, sparse, parallel 4052 4053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4054 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4055 @*/ 4056 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4057 { 4058 PetscErrorCode ierr; 4059 4060 4061 PetscFunctionBegin; 4062 if (i[0]) { 4063 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4064 } 4065 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4066 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4067 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4068 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4069 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4070 PetscFunctionReturn(0); 4071 } 4072