1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 if (v) PetscValidScalarPointer(v,6); 216 for (i=0; i<m; i++) { 217 if (im[i] < 0) continue; 218 #if defined(PETSC_USE_DEBUG) 219 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for (j=0; j<n; j++) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig) { 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined(PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 246 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba =b->a; 253 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 254 else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; 257 else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 264 if (!baij->donotstash) { 265 mat->assembled = PETSC_FALSE; 266 if (roworiented) { 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 268 } else { 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 282 const PetscScalar *value; 283 MatScalar *barray = baij->barray; 284 PetscBool roworiented = baij->roworiented; 285 PetscErrorCode ierr; 286 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 287 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 288 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 289 290 PetscFunctionBegin; 291 if (!barray) { 292 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 293 baij->barray = barray; 294 } 295 296 if (roworiented) stepval = (n-1)*bs; 297 else stepval = (m-1)*bs; 298 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if ((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend) { 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined(PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined(PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 355 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 358 } else col = in[j]; 359 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 360 } 361 } 362 } else { 363 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 364 if (!baij->donotstash) { 365 if (roworiented) { 366 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 367 } else { 368 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #define HASH_KEY 0.6180339887 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 385 PetscBool roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,row,col; 388 PetscInt rstart_orig=mat->rmap->rstart; 389 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 390 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 391 PetscReal tmp; 392 MatScalar **HD = baij->hd,value; 393 #if defined(PETSC_USE_DEBUG) 394 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 395 #endif 396 397 PetscFunctionBegin; 398 if (v) PetscValidScalarPointer(v,6); 399 for (i=0; i<m; i++) { 400 #if defined(PETSC_USE_DEBUG) 401 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 402 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 403 #endif 404 row = im[i]; 405 if (row >= rstart_orig && row < rend_orig) { 406 for (j=0; j<n; j++) { 407 col = in[j]; 408 if (roworiented) value = v[i*n+j]; 409 else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 423 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 424 } 425 } 426 #else 427 if (HT[idx] != key) { 428 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 429 if (idx == size) { 430 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 431 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else if (!baij->donotstash) { 440 if (roworiented) { 441 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 442 } else { 443 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 444 } 445 } 446 } 447 #if defined(PETSC_USE_DEBUG) 448 baij->ht_total_ct = total_ct; 449 baij->ht_insert_ct = insert_ct; 450 #endif 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 457 { 458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 459 PetscBool roworiented = baij->roworiented; 460 PetscErrorCode ierr; 461 PetscInt i,j,ii,jj,row,col; 462 PetscInt rstart=baij->rstartbs; 463 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 464 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 465 PetscReal tmp; 466 MatScalar **HD = baij->hd,*baij_a; 467 const PetscScalar *v_t,*value; 468 #if defined(PETSC_USE_DEBUG) 469 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 470 #endif 471 472 PetscFunctionBegin; 473 if (roworiented) stepval = (n-1)*bs; 474 else stepval = (m-1)*bs; 475 476 for (i=0; i<m; i++) { 477 #if defined(PETSC_USE_DEBUG) 478 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 479 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 480 #endif 481 row = im[i]; 482 v_t = v + i*nbs2; 483 if (row >= rstart && row < rend) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 487 /* Look up into the Hash Table */ 488 key = row*Nbs+col+1; 489 h1 = HASH(size,key,tmp); 490 491 idx = h1; 492 #if defined(PETSC_USE_DEBUG) 493 total_ct++; 494 insert_ct++; 495 if (HT[idx] != key) { 496 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == size) { 498 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 500 } 501 } 502 #else 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #endif 511 baij_a = HD[idx]; 512 if (roworiented) { 513 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 514 /* value = v + (i*(stepval+bs)+j)*bs; */ 515 value = v_t; 516 v_t += bs; 517 if (addv == ADD_VALUES) { 518 for (ii=0; ii<bs; ii++,value+=stepval) { 519 for (jj=ii; jj<bs2; jj+=bs) { 520 baij_a[jj] += *value++; 521 } 522 } 523 } else { 524 for (ii=0; ii<bs; ii++,value+=stepval) { 525 for (jj=ii; jj<bs2; jj+=bs) { 526 baij_a[jj] = *value++; 527 } 528 } 529 } 530 } else { 531 value = v + j*(stepval+bs)*bs + i*bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 534 for (jj=0; jj<bs; jj++) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 540 for (jj=0; jj<bs; jj++) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } 546 } 547 } else { 548 if (!baij->donotstash) { 549 if (roworiented) { 550 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 #if defined(PETSC_USE_DEBUG) 558 baij->ht_total_ct = total_ct; 559 baij->ht_insert_ct = insert_ct; 560 #endif 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetValues_MPIBAIJ" 566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 571 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 572 573 PetscFunctionBegin; 574 for (i=0; i<m; i++) { 575 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 576 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 577 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 578 row = idxm[i] - bsrstart; 579 for (j=0; j<n; j++) { 580 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 581 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 582 if (idxn[j] >= bscstart && idxn[j] < bscend) { 583 col = idxn[j] - bscstart; 584 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 585 } else { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 591 data--; 592 #else 593 data = baij->colmap[idxn[j]/bs]-1; 594 #endif 595 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 596 else { 597 col = data + idxn[j]%bs; 598 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } 600 } 601 } 602 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatNorm_MPIBAIJ" 609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 612 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 613 PetscErrorCode ierr; 614 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 615 PetscReal sum = 0.0; 616 MatScalar *v; 617 618 PetscFunctionBegin; 619 if (baij->size == 1) { 620 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 v = amat->a; 624 nz = amat->nz*bs2; 625 for (i=0; i<nz; i++) { 626 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 627 } 628 v = bmat->a; 629 nz = bmat->nz*bs2; 630 for (i=0; i<nz; i++) { 631 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 632 } 633 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 *nrm = PetscSqrtReal(*nrm); 635 } else if (type == NORM_1) { /* max column sum */ 636 PetscReal *tmp,*tmp2; 637 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 638 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 639 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 640 v = amat->a; jj = amat->j; 641 for (i=0; i<amat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*(cstart + *jj) + j; /* column index */ 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 v = bmat->a; jj = bmat->j; 651 for (i=0; i<bmat->nz; i++) { 652 for (j=0; j<bs; j++) { 653 col = bs*garray[*jj] + j; 654 for (row=0; row<bs; row++) { 655 tmp[col] += PetscAbsScalar(*v); v++; 656 } 657 } 658 jj++; 659 } 660 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 *nrm = 0.0; 662 for (j=0; j<mat->cmap->N; j++) { 663 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 664 } 665 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 666 } else if (type == NORM_INFINITY) { /* max row sum */ 667 PetscReal *sums; 668 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 669 sum = 0.0; 670 for (j=0; j<amat->mbs; j++) { 671 for (row=0; row<bs; row++) sums[row] = 0.0; 672 v = amat->a + bs2*amat->i[j]; 673 nz = amat->i[j+1]-amat->i[j]; 674 for (i=0; i<nz; i++) { 675 for (col=0; col<bs; col++) { 676 for (row=0; row<bs; row++) { 677 sums[row] += PetscAbsScalar(*v); v++; 678 } 679 } 680 } 681 v = bmat->a + bs2*bmat->i[j]; 682 nz = bmat->i[j+1]-bmat->i[j]; 683 for (i=0; i<nz; i++) { 684 for (col=0; col<bs; col++) { 685 for (row=0; row<bs; row++) { 686 sums[row] += PetscAbsScalar(*v); v++; 687 } 688 } 689 } 690 for (row=0; row<bs; row++) { 691 if (sums[row] > sum) sum = sums[row]; 692 } 693 } 694 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 695 ierr = PetscFree(sums);CHKERRQ(ierr); 696 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /* 702 Creates the hash table, and sets the table 703 This table is created only once. 704 If new entried need to be added to the matrix 705 then the hash table has to be destroyed and 706 recreated. 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 711 { 712 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 713 Mat A = baij->A,B=baij->B; 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 715 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 716 PetscErrorCode ierr; 717 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 718 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 719 PetscInt *HT,key; 720 MatScalar **HD; 721 PetscReal tmp; 722 #if defined(PETSC_USE_INFO) 723 PetscInt ct=0,max=0; 724 #endif 725 726 PetscFunctionBegin; 727 if (baij->ht) PetscFunctionReturn(0); 728 729 baij->ht_size = (PetscInt)(factor*nz); 730 ht_size = baij->ht_size; 731 732 /* Allocate Memory for Hash Table */ 733 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 734 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 735 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 736 HD = baij->hd; 737 HT = baij->ht; 738 739 /* Loop Over A */ 740 for (i=0; i<a->mbs; i++) { 741 for (j=ai[i]; j<ai[i+1]; j++) { 742 row = i+rstart; 743 col = aj[j]+cstart; 744 745 key = row*Nbs + col + 1; 746 h1 = HASH(ht_size,key,tmp); 747 for (k=0; k<ht_size; k++) { 748 if (!HT[(h1+k)%ht_size]) { 749 HT[(h1+k)%ht_size] = key; 750 HD[(h1+k)%ht_size] = a->a + j*bs2; 751 break; 752 #if defined(PETSC_USE_INFO) 753 } else { 754 ct++; 755 #endif 756 } 757 } 758 #if defined(PETSC_USE_INFO) 759 if (k> max) max = k; 760 #endif 761 } 762 } 763 /* Loop Over B */ 764 for (i=0; i<b->mbs; i++) { 765 for (j=bi[i]; j<bi[i+1]; j++) { 766 row = i+rstart; 767 col = garray[bj[j]]; 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++) { 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = b->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 787 /* Print Summary */ 788 #if defined(PETSC_USE_INFO) 789 for (i=0,j=0; i<ht_size; i++) { 790 if (HT[i]) j++; 791 } 792 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 793 #endif 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 799 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 802 PetscErrorCode ierr; 803 PetscInt nstash,reallocs; 804 InsertMode addv; 805 806 PetscFunctionBegin; 807 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 808 809 /* make sure all processors are either in INSERTMODE or ADDMODE */ 810 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 811 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 812 mat->insertmode = addv; /* in case this processor had no cache */ 813 814 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 815 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 817 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 818 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 819 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 825 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 828 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 829 PetscErrorCode ierr; 830 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 831 PetscInt *row,*col; 832 PetscBool r1,r2,r3,other_disassembled; 833 MatScalar *val; 834 InsertMode addv = mat->insertmode; 835 PetscMPIInt n; 836 837 PetscFunctionBegin; 838 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 839 if (!baij->donotstash && !mat->nooffprocentries) { 840 while (1) { 841 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 842 if (!flg) break; 843 844 for (i=0; i<n;) { 845 /* Now identify the consecutive vals belonging to the same row */ 846 for (j=i,rstart=row[j]; j<n; j++) { 847 if (row[j] != rstart) break; 848 } 849 if (j < n) ncols = j-i; 850 else ncols = n-i; 851 /* Now assemble all these values with a single function call */ 852 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 853 i = j; 854 } 855 } 856 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 857 /* Now process the block-stash. Since the values are stashed column-oriented, 858 set the roworiented flag to column oriented, and after MatSetValues() 859 restore the original flags */ 860 r1 = baij->roworiented; 861 r2 = a->roworiented; 862 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 863 864 baij->roworiented = PETSC_FALSE; 865 a->roworiented = PETSC_FALSE; 866 867 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { 875 if (row[j] != rstart) break; 876 } 877 if (j < n) ncols = j-i; 878 else ncols = n-i; 879 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 880 i = j; 881 } 882 } 883 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 884 885 baij->roworiented = r1; 886 a->roworiented = r2; 887 888 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 889 } 890 891 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 892 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 893 894 /* determine if any processor has disassembled, if so we must 895 also disassemble ourselfs, in order that we may reassemble. */ 896 /* 897 if nonzero structure of submatrix B cannot change then we know that 898 no processor disassembled thus we can skip this stuff 899 */ 900 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 901 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 902 if (mat->was_assembled && !other_disassembled) { 903 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 904 } 905 } 906 907 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 908 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 909 } 910 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 911 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 912 913 #if defined(PETSC_USE_INFO) 914 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 915 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 916 917 baij->ht_total_ct = 0; 918 baij->ht_insert_ct = 0; 919 } 920 #endif 921 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 922 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 923 924 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 925 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 926 } 927 928 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 929 930 baij->rowvalues = 0; 931 PetscFunctionReturn(0); 932 } 933 934 #include <petscdraw.h> 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 937 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 938 { 939 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 940 PetscErrorCode ierr; 941 PetscMPIInt size = baij->size,rank = baij->rank; 942 PetscInt bs = mat->rmap->bs; 943 PetscBool iascii,isdraw; 944 PetscViewer sviewer; 945 PetscViewerFormat format; 946 947 PetscFunctionBegin; 948 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 949 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 950 if (iascii) { 951 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 952 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 953 MatInfo info; 954 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 955 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 958 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 959 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 961 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 962 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 963 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 964 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 965 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 966 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } else if (format == PETSC_VIEWER_ASCII_INFO) { 969 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 972 PetscFunctionReturn(0); 973 } 974 } 975 976 if (isdraw) { 977 PetscDraw draw; 978 PetscBool isnull; 979 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 980 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 981 } 982 983 if (size == 1) { 984 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 985 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 986 } else { 987 /* assemble the entire matrix onto first processor. */ 988 Mat A; 989 Mat_SeqBAIJ *Aloc; 990 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 991 MatScalar *a; 992 993 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 994 /* Perhaps this should be the type of mat? */ 995 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 996 if (!rank) { 997 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 998 } else { 999 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1000 } 1001 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1002 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1003 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1004 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1005 1006 /* copy over the A part */ 1007 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1008 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1009 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1010 1011 for (i=0; i<mbs; i++) { 1012 rvals[0] = bs*(baij->rstartbs + i); 1013 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1014 for (j=ai[i]; j<ai[i+1]; j++) { 1015 col = (baij->cstartbs+aj[j])*bs; 1016 for (k=0; k<bs; k++) { 1017 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1018 col++; a += bs; 1019 } 1020 } 1021 } 1022 /* copy over the B part */ 1023 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1024 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = baij->garray[aj[j]]*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 ierr = PetscFree(rvals);CHKERRQ(ierr); 1037 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 /* 1040 Everyone has to call to draw the matrix since the graphics waits are 1041 synchronized across all processors that share the PetscDraw object 1042 */ 1043 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1044 if (!rank) { 1045 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1046 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1047 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1048 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1049 } 1050 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1051 ierr = MatDestroy(&A);CHKERRQ(ierr); 1052 } 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1058 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1059 { 1060 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1061 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1062 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1063 PetscErrorCode ierr; 1064 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1065 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1066 int fd; 1067 PetscScalar *column_values; 1068 FILE *file; 1069 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1070 PetscInt message_count,flowcontrolcount; 1071 1072 PetscFunctionBegin; 1073 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1074 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1075 nz = bs2*(A->nz + B->nz); 1076 rlen = mat->rmap->n; 1077 if (!rank) { 1078 header[0] = MAT_FILE_CLASSID; 1079 header[1] = mat->rmap->N; 1080 header[2] = mat->cmap->N; 1081 1082 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1083 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1084 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1085 /* get largest number of rows any processor has */ 1086 range = mat->rmap->range; 1087 for (i=1; i<size; i++) { 1088 rlen = PetscMax(rlen,range[i+1] - range[i]); 1089 } 1090 } else { 1091 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1092 } 1093 1094 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1095 /* compute lengths of each row */ 1096 for (i=0; i<a->mbs; i++) { 1097 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1098 } 1099 /* store the row lengths to the file */ 1100 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1101 if (!rank) { 1102 MPI_Status status; 1103 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1104 rlen = (range[1] - range[0])/bs; 1105 for (i=0; i<rlen; i++) { 1106 for (j=0; j<bs; j++) { 1107 row_lens[i*bs+j] = bs*crow_lens[i]; 1108 } 1109 } 1110 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1111 for (i=1; i<size; i++) { 1112 rlen = (range[i+1] - range[i])/bs; 1113 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1114 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1115 for (k=0; k<rlen; k++) { 1116 for (j=0; j<bs; j++) { 1117 row_lens[k*bs+j] = bs*crow_lens[k]; 1118 } 1119 } 1120 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1121 } 1122 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1123 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1124 } else { 1125 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1126 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1127 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1128 } 1129 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1130 1131 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1132 information needed to make it for each row from a block row. This does require more communication but still not more than 1133 the communication needed for the nonzero values */ 1134 nzmax = nz; /* space a largest processor needs */ 1135 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1136 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1137 cnt = 0; 1138 for (i=0; i<a->mbs; i++) { 1139 pcnt = cnt; 1140 for (j=B->i[i]; j<B->i[i+1]; j++) { 1141 if ((col = garray[B->j[j]]) > cstart) break; 1142 for (l=0; l<bs; l++) { 1143 column_indices[cnt++] = bs*col+l; 1144 } 1145 } 1146 for (k=A->i[i]; k<A->i[i+1]; k++) { 1147 for (l=0; l<bs; l++) { 1148 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1149 } 1150 } 1151 for (; j<B->i[i+1]; j++) { 1152 for (l=0; l<bs; l++) { 1153 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1154 } 1155 } 1156 len = cnt - pcnt; 1157 for (k=1; k<bs; k++) { 1158 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1159 cnt += len; 1160 } 1161 } 1162 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1163 1164 /* store the columns to the file */ 1165 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1166 if (!rank) { 1167 MPI_Status status; 1168 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1169 for (i=1; i<size; i++) { 1170 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1171 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1172 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1173 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 } 1175 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1176 } else { 1177 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1178 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1179 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1180 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1181 } 1182 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1183 1184 /* load up the numerical values */ 1185 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1186 cnt = 0; 1187 for (i=0; i<a->mbs; i++) { 1188 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1189 for (j=B->i[i]; j<B->i[i+1]; j++) { 1190 if (garray[B->j[j]] > cstart) break; 1191 for (l=0; l<bs; l++) { 1192 for (ll=0; ll<bs; ll++) { 1193 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1194 } 1195 } 1196 cnt += bs; 1197 } 1198 for (k=A->i[i]; k<A->i[i+1]; k++) { 1199 for (l=0; l<bs; l++) { 1200 for (ll=0; ll<bs; ll++) { 1201 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1202 } 1203 } 1204 cnt += bs; 1205 } 1206 for (; j<B->i[i+1]; j++) { 1207 for (l=0; l<bs; l++) { 1208 for (ll=0; ll<bs; ll++) { 1209 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1210 } 1211 } 1212 cnt += bs; 1213 } 1214 cnt += (bs-1)*rlen; 1215 } 1216 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1217 1218 /* store the column values to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 MPI_Status status; 1222 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1223 for (i=1; i<size; i++) { 1224 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1225 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1226 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1227 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1228 } 1229 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1230 } else { 1231 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1232 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1235 } 1236 ierr = PetscFree(column_values);CHKERRQ(ierr); 1237 1238 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1239 if (file) { 1240 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1241 } 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatView_MPIBAIJ" 1247 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1248 { 1249 PetscErrorCode ierr; 1250 PetscBool iascii,isdraw,issocket,isbinary; 1251 1252 PetscFunctionBegin; 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1255 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1256 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1257 if (iascii || isdraw || issocket) { 1258 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1259 } else if (isbinary) { 1260 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1268 { 1269 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1270 PetscErrorCode ierr; 1271 1272 PetscFunctionBegin; 1273 #if defined(PETSC_USE_LOG) 1274 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1275 #endif 1276 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1277 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1278 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1279 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1280 #if defined(PETSC_USE_CTABLE) 1281 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1282 #else 1283 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1284 #endif 1285 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1286 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1287 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1290 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1291 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1292 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1293 1294 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatMult_MPIBAIJ" 1309 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1310 { 1311 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 PetscInt nt; 1314 1315 PetscFunctionBegin; 1316 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1317 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1318 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1319 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1320 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1322 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1323 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1329 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1330 { 1331 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1337 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1344 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1345 { 1346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1347 PetscErrorCode ierr; 1348 PetscBool merged; 1349 1350 PetscFunctionBegin; 1351 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1352 /* do nondiagonal part */ 1353 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1354 if (!merged) { 1355 /* send it on its way */ 1356 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1357 /* do local part */ 1358 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1359 /* receive remote parts: note this assumes the values are not actually */ 1360 /* inserted in yy until the next line */ 1361 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1362 } else { 1363 /* do local part */ 1364 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1365 /* send it on its way */ 1366 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 /* values actually were received in the Begin() but we need to call this nop */ 1368 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1375 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1376 { 1377 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1378 PetscErrorCode ierr; 1379 1380 PetscFunctionBegin; 1381 /* do nondiagonal part */ 1382 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1383 /* send it on its way */ 1384 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1385 /* do local part */ 1386 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1387 /* receive remote parts: note this assumes the values are not actually */ 1388 /* inserted in yy until the next line, which is true for my implementation*/ 1389 /* but is not perhaps always true. */ 1390 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 /* 1395 This only works correctly for square matrices where the subblock A->A is the 1396 diagonal block 1397 */ 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1400 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1401 { 1402 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1403 PetscErrorCode ierr; 1404 1405 PetscFunctionBegin; 1406 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1407 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatScale_MPIBAIJ" 1413 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1414 { 1415 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1420 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1426 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1427 { 1428 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1429 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1430 PetscErrorCode ierr; 1431 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1432 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1433 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1434 1435 PetscFunctionBegin; 1436 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1437 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1438 mat->getrowactive = PETSC_TRUE; 1439 1440 if (!mat->rowvalues && (idx || v)) { 1441 /* 1442 allocate enough space to hold information from the longest row. 1443 */ 1444 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1445 PetscInt max = 1,mbs = mat->mbs,tmp; 1446 for (i=0; i<mbs; i++) { 1447 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1448 if (max < tmp) max = tmp; 1449 } 1450 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1451 } 1452 lrow = row - brstart; 1453 1454 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1455 if (!v) {pvA = 0; pvB = 0;} 1456 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1457 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1458 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1459 nztot = nzA + nzB; 1460 1461 cmap = mat->garray; 1462 if (v || idx) { 1463 if (nztot) { 1464 /* Sort by increasing column numbers, assuming A and B already sorted */ 1465 PetscInt imark = -1; 1466 if (v) { 1467 *v = v_p = mat->rowvalues; 1468 for (i=0; i<nzB; i++) { 1469 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1470 else break; 1471 } 1472 imark = i; 1473 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1474 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1475 } 1476 if (idx) { 1477 *idx = idx_p = mat->rowindices; 1478 if (imark > -1) { 1479 for (i=0; i<imark; i++) { 1480 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1481 } 1482 } else { 1483 for (i=0; i<nzB; i++) { 1484 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1485 else break; 1486 } 1487 imark = i; 1488 } 1489 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1490 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1491 } 1492 } else { 1493 if (idx) *idx = 0; 1494 if (v) *v = 0; 1495 } 1496 } 1497 *nz = nztot; 1498 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1499 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1505 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1506 { 1507 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1508 1509 PetscFunctionBegin; 1510 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1511 baij->getrowactive = PETSC_FALSE; 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1517 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1518 { 1519 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1524 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1530 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1531 { 1532 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1533 Mat A = a->A,B = a->B; 1534 PetscErrorCode ierr; 1535 PetscReal isend[5],irecv[5]; 1536 1537 PetscFunctionBegin; 1538 info->block_size = (PetscReal)matin->rmap->bs; 1539 1540 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1541 1542 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1543 isend[3] = info->memory; isend[4] = info->mallocs; 1544 1545 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1546 1547 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1548 isend[3] += info->memory; isend[4] += info->mallocs; 1549 1550 if (flag == MAT_LOCAL) { 1551 info->nz_used = isend[0]; 1552 info->nz_allocated = isend[1]; 1553 info->nz_unneeded = isend[2]; 1554 info->memory = isend[3]; 1555 info->mallocs = isend[4]; 1556 } else if (flag == MAT_GLOBAL_MAX) { 1557 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1558 1559 info->nz_used = irecv[0]; 1560 info->nz_allocated = irecv[1]; 1561 info->nz_unneeded = irecv[2]; 1562 info->memory = irecv[3]; 1563 info->mallocs = irecv[4]; 1564 } else if (flag == MAT_GLOBAL_SUM) { 1565 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1566 1567 info->nz_used = irecv[0]; 1568 info->nz_allocated = irecv[1]; 1569 info->nz_unneeded = irecv[2]; 1570 info->memory = irecv[3]; 1571 info->mallocs = irecv[4]; 1572 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1573 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1574 info->fill_ratio_needed = 0; 1575 info->factor_mallocs = 0; 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1581 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1582 { 1583 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 switch (op) { 1588 case MAT_NEW_NONZERO_LOCATIONS: 1589 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1590 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1591 case MAT_KEEP_NONZERO_PATTERN: 1592 case MAT_NEW_NONZERO_LOCATION_ERR: 1593 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1594 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1595 break; 1596 case MAT_ROW_ORIENTED: 1597 a->roworiented = flg; 1598 1599 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1600 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1601 break; 1602 case MAT_NEW_DIAGONALS: 1603 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1604 break; 1605 case MAT_IGNORE_OFF_PROC_ENTRIES: 1606 a->donotstash = flg; 1607 break; 1608 case MAT_USE_HASH_TABLE: 1609 a->ht_flag = flg; 1610 break; 1611 case MAT_SYMMETRIC: 1612 case MAT_STRUCTURALLY_SYMMETRIC: 1613 case MAT_HERMITIAN: 1614 case MAT_SYMMETRY_ETERNAL: 1615 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1616 break; 1617 default: 1618 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1619 } 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1625 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1626 { 1627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1628 Mat_SeqBAIJ *Aloc; 1629 Mat B; 1630 PetscErrorCode ierr; 1631 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1632 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1633 MatScalar *a; 1634 1635 PetscFunctionBegin; 1636 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1637 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1638 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1639 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1640 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1641 /* Do not know preallocation information, but must set block size */ 1642 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1643 } else { 1644 B = *matout; 1645 } 1646 1647 /* copy over the A part */ 1648 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1649 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1650 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1651 1652 for (i=0; i<mbs; i++) { 1653 rvals[0] = bs*(baij->rstartbs + i); 1654 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1655 for (j=ai[i]; j<ai[i+1]; j++) { 1656 col = (baij->cstartbs+aj[j])*bs; 1657 for (k=0; k<bs; k++) { 1658 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1659 1660 col++; a += bs; 1661 } 1662 } 1663 } 1664 /* copy over the B part */ 1665 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1666 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1667 for (i=0; i<mbs; i++) { 1668 rvals[0] = bs*(baij->rstartbs + i); 1669 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1670 for (j=ai[i]; j<ai[i+1]; j++) { 1671 col = baij->garray[aj[j]]*bs; 1672 for (k=0; k<bs; k++) { 1673 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1674 col++; 1675 a += bs; 1676 } 1677 } 1678 } 1679 ierr = PetscFree(rvals);CHKERRQ(ierr); 1680 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 1683 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1684 else { 1685 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1686 } 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1692 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1693 { 1694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1695 Mat a = baij->A,b = baij->B; 1696 PetscErrorCode ierr; 1697 PetscInt s1,s2,s3; 1698 1699 PetscFunctionBegin; 1700 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1701 if (rr) { 1702 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1703 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1704 /* Overlap communication with computation. */ 1705 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1706 } 1707 if (ll) { 1708 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1709 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1710 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1711 } 1712 /* scale the diagonal block */ 1713 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1714 1715 if (rr) { 1716 /* Do a scatter end and then right scale the off-diagonal block */ 1717 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1718 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1725 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1726 { 1727 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1728 PetscInt *owners = A->rmap->range; 1729 PetscInt n = A->rmap->n; 1730 PetscMPIInt size = l->size; 1731 PetscSF sf; 1732 PetscInt *lrows; 1733 PetscSFNode *rrows; 1734 PetscInt lastidx = -1, r, p = 0, len = 0; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 /* Create SF where leaves are input rows and roots are owned rows */ 1739 ierr = PetscMalloc(n * sizeof(PetscInt), &lrows);CHKERRQ(ierr); 1740 for (r = 0; r < n; ++r) lrows[r] = -1; 1741 ierr = PetscMalloc(N * sizeof(PetscSFNode), &rrows);CHKERRQ(ierr); 1742 for (r = 0; r < N; ++r) { 1743 const PetscInt idx = rows[r]; 1744 PetscBool found = PETSC_FALSE; 1745 /* Trick for efficient searching for sorted rows */ 1746 if (lastidx > idx) p = 0; 1747 lastidx = idx; 1748 for (; p < size; ++p) { 1749 if (idx >= owners[p] && idx < owners[p+1]) { 1750 rrows[r].rank = p; 1751 rrows[r].index = rows[r] - owners[p]; 1752 found = PETSC_TRUE; 1753 break; 1754 } 1755 } 1756 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1757 } 1758 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1759 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1760 /* Collect flags for rows to be zeroed */ 1761 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1762 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1763 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1764 /* Compress and put in row numbers */ 1765 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1766 /* fix right hand side if needed */ 1767 if (x && b) { 1768 const PetscScalar *xx; 1769 PetscScalar *bb; 1770 1771 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1772 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1773 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1774 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1775 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1776 } 1777 1778 /* actually zap the local rows */ 1779 /* 1780 Zero the required rows. If the "diagonal block" of the matrix 1781 is square and the user wishes to set the diagonal we use separate 1782 code so that MatSetValues() is not called for each diagonal allocating 1783 new memory, thus calling lots of mallocs and slowing things down. 1784 1785 */ 1786 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1787 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr); 1788 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1789 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr); 1790 } else if (diag != 0.0) { 1791 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1792 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1793 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1794 for (r = 0; r < len; ++r) { 1795 const PetscInt row = lrows[r] + A->rmap->rstart; 1796 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1797 } 1798 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1799 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1800 } else { 1801 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1802 } 1803 ierr = PetscFree(lrows);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1809 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1810 { 1811 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1812 PetscErrorCode ierr; 1813 1814 PetscFunctionBegin; 1815 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 1819 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatEqual_MPIBAIJ" 1823 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1824 { 1825 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1826 Mat a,b,c,d; 1827 PetscBool flg; 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 a = matA->A; b = matA->B; 1832 c = matB->A; d = matB->B; 1833 1834 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1835 if (flg) { 1836 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1837 } 1838 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatCopy_MPIBAIJ" 1844 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1845 { 1846 PetscErrorCode ierr; 1847 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1848 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1849 1850 PetscFunctionBegin; 1851 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1852 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1853 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1854 } else { 1855 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1856 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1863 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1864 { 1865 PetscErrorCode ierr; 1866 1867 PetscFunctionBegin; 1868 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1874 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1875 { 1876 PetscErrorCode ierr; 1877 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1878 PetscBLASInt bnz,one=1; 1879 Mat_SeqBAIJ *x,*y; 1880 1881 PetscFunctionBegin; 1882 if (str == SAME_NONZERO_PATTERN) { 1883 PetscScalar alpha = a; 1884 x = (Mat_SeqBAIJ*)xx->A->data; 1885 y = (Mat_SeqBAIJ*)yy->A->data; 1886 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1887 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1888 x = (Mat_SeqBAIJ*)xx->B->data; 1889 y = (Mat_SeqBAIJ*)yy->B->data; 1890 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1891 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1892 } else { 1893 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1900 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1901 { 1902 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1907 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1913 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1914 { 1915 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1916 PetscErrorCode ierr; 1917 1918 PetscFunctionBegin; 1919 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1920 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1921 PetscFunctionReturn(0); 1922 } 1923 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1926 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1927 { 1928 PetscErrorCode ierr; 1929 IS iscol_local; 1930 PetscInt csize; 1931 1932 PetscFunctionBegin; 1933 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1934 if (call == MAT_REUSE_MATRIX) { 1935 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1936 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1937 } else { 1938 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1939 } 1940 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1941 if (call == MAT_INITIAL_MATRIX) { 1942 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1943 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 1950 /* 1951 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1952 in local and then by concatenating the local matrices the end result. 1953 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1954 */ 1955 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1956 { 1957 PetscErrorCode ierr; 1958 PetscMPIInt rank,size; 1959 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1960 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 1961 Mat M,Mreuse; 1962 MatScalar *vwork,*aa; 1963 MPI_Comm comm; 1964 IS isrow_new, iscol_new; 1965 PetscBool idflag,allrows, allcols; 1966 Mat_SeqBAIJ *aij; 1967 1968 PetscFunctionBegin; 1969 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1970 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1971 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1972 /* The compression and expansion should be avoided. Doesn't point 1973 out errors, might change the indices, hence buggey */ 1974 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 1975 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 1976 1977 /* Check for special case: each processor gets entire matrix columns */ 1978 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 1979 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 1980 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 1981 else allcols = PETSC_FALSE; 1982 1983 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 1984 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 1985 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 1986 else allrows = PETSC_FALSE; 1987 1988 if (call == MAT_REUSE_MATRIX) { 1989 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1990 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1991 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 1992 } else { 1993 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 1994 } 1995 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 1996 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 1997 /* 1998 m - number of local rows 1999 n - number of columns (same on all processors) 2000 rstart - first row in new global matrix generated 2001 */ 2002 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2003 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2004 m = m/bs; 2005 n = n/bs; 2006 2007 if (call == MAT_INITIAL_MATRIX) { 2008 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2009 ii = aij->i; 2010 jj = aij->j; 2011 2012 /* 2013 Determine the number of non-zeros in the diagonal and off-diagonal 2014 portions of the matrix in order to do correct preallocation 2015 */ 2016 2017 /* first get start and end of "diagonal" columns */ 2018 if (csize == PETSC_DECIDE) { 2019 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2020 if (mglobal == n*bs) { /* square matrix */ 2021 nlocal = m; 2022 } else { 2023 nlocal = n/size + ((n % size) > rank); 2024 } 2025 } else { 2026 nlocal = csize/bs; 2027 } 2028 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2029 rstart = rend - nlocal; 2030 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2031 2032 /* next, compute all the lengths */ 2033 ierr = PetscMalloc2(m+1,PetscInt,&dlens,m+1,PetscInt,&olens);CHKERRQ(ierr); 2034 for (i=0; i<m; i++) { 2035 jend = ii[i+1] - ii[i]; 2036 olen = 0; 2037 dlen = 0; 2038 for (j=0; j<jend; j++) { 2039 if (*jj < rstart || *jj >= rend) olen++; 2040 else dlen++; 2041 jj++; 2042 } 2043 olens[i] = olen; 2044 dlens[i] = dlen; 2045 } 2046 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2047 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2048 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2049 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2050 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2051 } else { 2052 PetscInt ml,nl; 2053 2054 M = *newmat; 2055 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2056 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2057 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2058 /* 2059 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2060 rather than the slower MatSetValues(). 2061 */ 2062 M->was_assembled = PETSC_TRUE; 2063 M->assembled = PETSC_FALSE; 2064 } 2065 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2066 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2067 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2068 ii = aij->i; 2069 jj = aij->j; 2070 aa = aij->a; 2071 for (i=0; i<m; i++) { 2072 row = rstart/bs + i; 2073 nz = ii[i+1] - ii[i]; 2074 cwork = jj; jj += nz; 2075 vwork = aa; aa += nz*bs*bs; 2076 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2077 } 2078 2079 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 *newmat = M; 2082 2083 /* save submatrix used in processor for next request */ 2084 if (call == MAT_INITIAL_MATRIX) { 2085 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2086 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2087 } 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatPermute_MPIBAIJ" 2093 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2094 { 2095 MPI_Comm comm,pcomm; 2096 PetscInt clocal_size,nrows; 2097 const PetscInt *rows; 2098 PetscMPIInt size; 2099 IS crowp,lcolp; 2100 PetscErrorCode ierr; 2101 2102 PetscFunctionBegin; 2103 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2104 /* make a collective version of 'rowp' */ 2105 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2106 if (pcomm==comm) { 2107 crowp = rowp; 2108 } else { 2109 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2110 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2111 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2112 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2113 } 2114 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2115 /* make a local version of 'colp' */ 2116 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2117 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2118 if (size==1) { 2119 lcolp = colp; 2120 } else { 2121 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2122 } 2123 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2124 /* now we just get the submatrix */ 2125 ierr = MatGetLocalSize(A,PETSC_NULL,&clocal_size);CHKERRQ(ierr); 2126 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2127 /* clean up */ 2128 if (pcomm!=comm) { 2129 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2130 } 2131 if (size>1) { 2132 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2133 } 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2139 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2140 { 2141 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2142 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2143 2144 PetscFunctionBegin; 2145 if (nghosts) *nghosts = B->nbs; 2146 if (ghosts) *ghosts = baij->garray; 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2152 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2153 { 2154 Mat B; 2155 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2156 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2157 Mat_SeqAIJ *b; 2158 PetscErrorCode ierr; 2159 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2160 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2161 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2162 2163 PetscFunctionBegin; 2164 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2165 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2166 2167 /* ---------------------------------------------------------------- 2168 Tell every processor the number of nonzeros per row 2169 */ 2170 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2171 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2172 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2173 } 2174 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2175 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2176 displs = recvcounts + size; 2177 for (i=0; i<size; i++) { 2178 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2179 displs[i] = A->rmap->range[i]/bs; 2180 } 2181 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2182 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2183 #else 2184 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2185 #endif 2186 /* --------------------------------------------------------------- 2187 Create the sequential matrix of the same type as the local block diagonal 2188 */ 2189 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2190 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2191 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2192 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2193 b = (Mat_SeqAIJ*)B->data; 2194 2195 /*-------------------------------------------------------------------- 2196 Copy my part of matrix column indices over 2197 */ 2198 sendcount = ad->nz + bd->nz; 2199 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2200 a_jsendbuf = ad->j; 2201 b_jsendbuf = bd->j; 2202 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2203 cnt = 0; 2204 for (i=0; i<n; i++) { 2205 2206 /* put in lower diagonal portion */ 2207 m = bd->i[i+1] - bd->i[i]; 2208 while (m > 0) { 2209 /* is it above diagonal (in bd (compressed) numbering) */ 2210 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2211 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2212 m--; 2213 } 2214 2215 /* put in diagonal portion */ 2216 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2217 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2218 } 2219 2220 /* put in upper diagonal portion */ 2221 while (m-- > 0) { 2222 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2223 } 2224 } 2225 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2226 2227 /*-------------------------------------------------------------------- 2228 Gather all column indices to all processors 2229 */ 2230 for (i=0; i<size; i++) { 2231 recvcounts[i] = 0; 2232 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2233 recvcounts[i] += lens[j]; 2234 } 2235 } 2236 displs[0] = 0; 2237 for (i=1; i<size; i++) { 2238 displs[i] = displs[i-1] + recvcounts[i-1]; 2239 } 2240 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2241 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2242 #else 2243 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2244 #endif 2245 /*-------------------------------------------------------------------- 2246 Assemble the matrix into useable form (note numerical values not yet set) 2247 */ 2248 /* set the b->ilen (length of each row) values */ 2249 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2250 /* set the b->i indices */ 2251 b->i[0] = 0; 2252 for (i=1; i<=A->rmap->N/bs; i++) { 2253 b->i[i] = b->i[i-1] + lens[i-1]; 2254 } 2255 ierr = PetscFree(lens);CHKERRQ(ierr); 2256 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2257 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2258 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2259 2260 if (A->symmetric) { 2261 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2262 } else if (A->hermitian) { 2263 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2264 } else if (A->structurally_symmetric) { 2265 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2266 } 2267 *newmat = B; 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatSOR_MPIBAIJ" 2273 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2274 { 2275 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2276 PetscErrorCode ierr; 2277 Vec bb1 = 0; 2278 2279 PetscFunctionBegin; 2280 if (flag == SOR_APPLY_UPPER) { 2281 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2286 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2287 } 2288 2289 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2290 if (flag & SOR_ZERO_INITIAL_GUESS) { 2291 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2292 its--; 2293 } 2294 2295 while (its--) { 2296 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2297 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2298 2299 /* update rhs: bb1 = bb - B*x */ 2300 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2301 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2302 2303 /* local sweep */ 2304 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2305 } 2306 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2307 if (flag & SOR_ZERO_INITIAL_GUESS) { 2308 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2309 its--; 2310 } 2311 while (its--) { 2312 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2313 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2314 2315 /* update rhs: bb1 = bb - B*x */ 2316 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2317 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2318 2319 /* local sweep */ 2320 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2321 } 2322 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2323 if (flag & SOR_ZERO_INITIAL_GUESS) { 2324 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2325 its--; 2326 } 2327 while (its--) { 2328 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2329 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2330 2331 /* update rhs: bb1 = bb - B*x */ 2332 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2333 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2334 2335 /* local sweep */ 2336 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2337 } 2338 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2339 2340 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2346 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2347 { 2348 PetscErrorCode ierr; 2349 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2350 PetscInt N,i,*garray = aij->garray; 2351 PetscInt ib,jb,bs = A->rmap->bs; 2352 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2353 MatScalar *a_val = a_aij->a; 2354 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2355 MatScalar *b_val = b_aij->a; 2356 PetscReal *work; 2357 2358 PetscFunctionBegin; 2359 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2360 ierr = PetscMalloc(N*sizeof(PetscReal),&work);CHKERRQ(ierr); 2361 ierr = PetscMemzero(work,N*sizeof(PetscReal));CHKERRQ(ierr); 2362 if (type == NORM_2) { 2363 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2364 for (jb=0; jb<bs; jb++) { 2365 for (ib=0; ib<bs; ib++) { 2366 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2367 a_val++; 2368 } 2369 } 2370 } 2371 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2372 for (jb=0; jb<bs; jb++) { 2373 for (ib=0; ib<bs; ib++) { 2374 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2375 b_val++; 2376 } 2377 } 2378 } 2379 } else if (type == NORM_1) { 2380 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2381 for (jb=0; jb<bs; jb++) { 2382 for (ib=0; ib<bs; ib++) { 2383 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2384 a_val++; 2385 } 2386 } 2387 } 2388 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2389 for (jb=0; jb<bs; jb++) { 2390 for (ib=0; ib<bs; ib++) { 2391 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2392 b_val++; 2393 } 2394 } 2395 } 2396 } else if (type == NORM_INFINITY) { 2397 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2398 for (jb=0; jb<bs; jb++) { 2399 for (ib=0; ib<bs; ib++) { 2400 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2401 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2402 a_val++; 2403 } 2404 } 2405 } 2406 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2407 for (jb=0; jb<bs; jb++) { 2408 for (ib=0; ib<bs; ib++) { 2409 int col = garray[b_aij->j[i]] * bs + jb; 2410 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2411 b_val++; 2412 } 2413 } 2414 } 2415 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2416 if (type == NORM_INFINITY) { 2417 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2418 } else { 2419 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2420 } 2421 ierr = PetscFree(work);CHKERRQ(ierr); 2422 if (type == NORM_2) { 2423 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2424 } 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2430 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2431 { 2432 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2433 PetscErrorCode ierr; 2434 2435 PetscFunctionBegin; 2436 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 2441 /* -------------------------------------------------------------------*/ 2442 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2443 MatGetRow_MPIBAIJ, 2444 MatRestoreRow_MPIBAIJ, 2445 MatMult_MPIBAIJ, 2446 /* 4*/ MatMultAdd_MPIBAIJ, 2447 MatMultTranspose_MPIBAIJ, 2448 MatMultTransposeAdd_MPIBAIJ, 2449 0, 2450 0, 2451 0, 2452 /*10*/ 0, 2453 0, 2454 0, 2455 MatSOR_MPIBAIJ, 2456 MatTranspose_MPIBAIJ, 2457 /*15*/ MatGetInfo_MPIBAIJ, 2458 MatEqual_MPIBAIJ, 2459 MatGetDiagonal_MPIBAIJ, 2460 MatDiagonalScale_MPIBAIJ, 2461 MatNorm_MPIBAIJ, 2462 /*20*/ MatAssemblyBegin_MPIBAIJ, 2463 MatAssemblyEnd_MPIBAIJ, 2464 MatSetOption_MPIBAIJ, 2465 MatZeroEntries_MPIBAIJ, 2466 /*24*/ MatZeroRows_MPIBAIJ, 2467 0, 2468 0, 2469 0, 2470 0, 2471 /*29*/ MatSetUp_MPIBAIJ, 2472 0, 2473 0, 2474 0, 2475 0, 2476 /*34*/ MatDuplicate_MPIBAIJ, 2477 0, 2478 0, 2479 0, 2480 0, 2481 /*39*/ MatAXPY_MPIBAIJ, 2482 MatGetSubMatrices_MPIBAIJ, 2483 MatIncreaseOverlap_MPIBAIJ, 2484 MatGetValues_MPIBAIJ, 2485 MatCopy_MPIBAIJ, 2486 /*44*/ 0, 2487 MatScale_MPIBAIJ, 2488 0, 2489 0, 2490 0, 2491 /*49*/ 0, 2492 0, 2493 0, 2494 0, 2495 0, 2496 /*54*/ MatFDColoringCreate_MPIXAIJ, 2497 0, 2498 MatSetUnfactored_MPIBAIJ, 2499 MatPermute_MPIBAIJ, 2500 MatSetValuesBlocked_MPIBAIJ, 2501 /*59*/ MatGetSubMatrix_MPIBAIJ, 2502 MatDestroy_MPIBAIJ, 2503 MatView_MPIBAIJ, 2504 0, 2505 0, 2506 /*64*/ 0, 2507 0, 2508 0, 2509 0, 2510 0, 2511 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2512 0, 2513 0, 2514 0, 2515 0, 2516 /*74*/ 0, 2517 MatFDColoringApply_BAIJ, 2518 0, 2519 0, 2520 0, 2521 /*79*/ 0, 2522 0, 2523 0, 2524 0, 2525 MatLoad_MPIBAIJ, 2526 /*84*/ 0, 2527 0, 2528 0, 2529 0, 2530 0, 2531 /*89*/ 0, 2532 0, 2533 0, 2534 0, 2535 0, 2536 /*94*/ 0, 2537 0, 2538 0, 2539 0, 2540 0, 2541 /*99*/ 0, 2542 0, 2543 0, 2544 0, 2545 0, 2546 /*104*/0, 2547 MatRealPart_MPIBAIJ, 2548 MatImaginaryPart_MPIBAIJ, 2549 0, 2550 0, 2551 /*109*/0, 2552 0, 2553 0, 2554 0, 2555 0, 2556 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2557 0, 2558 MatGetGhosts_MPIBAIJ, 2559 0, 2560 0, 2561 /*119*/0, 2562 0, 2563 0, 2564 0, 2565 MatGetMultiProcBlock_MPIBAIJ, 2566 /*124*/0, 2567 MatGetColumnNorms_MPIBAIJ, 2568 MatInvertBlockDiagonal_MPIBAIJ, 2569 0, 2570 0, 2571 /*129*/ 0, 2572 0, 2573 0, 2574 0, 2575 0, 2576 /*134*/ 0, 2577 0, 2578 0, 2579 0, 2580 0, 2581 /*139*/ 0, 2582 0, 2583 0, 2584 MatFDColoringSetUp_MPIXAIJ 2585 }; 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2589 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2590 { 2591 PetscFunctionBegin; 2592 *a = ((Mat_MPIBAIJ*)A->data)->A; 2593 PetscFunctionReturn(0); 2594 } 2595 2596 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2597 2598 #undef __FUNCT__ 2599 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2600 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2601 { 2602 PetscInt m,rstart,cstart,cend; 2603 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2604 const PetscInt *JJ =0; 2605 PetscScalar *values=0; 2606 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2607 PetscErrorCode ierr; 2608 2609 PetscFunctionBegin; 2610 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2611 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2612 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2613 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2614 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2615 m = B->rmap->n/bs; 2616 rstart = B->rmap->rstart/bs; 2617 cstart = B->cmap->rstart/bs; 2618 cend = B->cmap->rend/bs; 2619 2620 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2621 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2622 for (i=0; i<m; i++) { 2623 nz = ii[i+1] - ii[i]; 2624 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2625 nz_max = PetscMax(nz_max,nz); 2626 JJ = jj + ii[i]; 2627 for (j=0; j<nz; j++) { 2628 if (*JJ >= cstart) break; 2629 JJ++; 2630 } 2631 d = 0; 2632 for (; j<nz; j++) { 2633 if (*JJ++ >= cend) break; 2634 d++; 2635 } 2636 d_nnz[i] = d; 2637 o_nnz[i] = nz - d; 2638 } 2639 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2640 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2641 2642 values = (PetscScalar*)V; 2643 if (!values) { 2644 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2645 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2646 } 2647 for (i=0; i<m; i++) { 2648 PetscInt row = i + rstart; 2649 PetscInt ncols = ii[i+1] - ii[i]; 2650 const PetscInt *icols = jj + ii[i]; 2651 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2652 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2653 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2654 } else { /* block ordering does not match so we can only insert one block at a time. */ 2655 PetscInt j; 2656 for (j=0; j<ncols; j++) { 2657 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2658 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2659 } 2660 } 2661 } 2662 2663 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2664 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2665 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2666 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2667 PetscFunctionReturn(0); 2668 } 2669 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2672 /*@C 2673 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2674 (the default parallel PETSc format). 2675 2676 Collective on MPI_Comm 2677 2678 Input Parameters: 2679 + A - the matrix 2680 . bs - the block size 2681 . i - the indices into j for the start of each local row (starts with zero) 2682 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2683 - v - optional values in the matrix 2684 2685 Level: developer 2686 2687 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2688 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2689 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2690 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2691 block column and the second index is over columns within a block. 2692 2693 .keywords: matrix, aij, compressed row, sparse, parallel 2694 2695 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2696 @*/ 2697 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2698 { 2699 PetscErrorCode ierr; 2700 2701 PetscFunctionBegin; 2702 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2703 PetscValidType(B,1); 2704 PetscValidLogicalCollectiveInt(B,bs,2); 2705 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2706 PetscFunctionReturn(0); 2707 } 2708 2709 #undef __FUNCT__ 2710 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2711 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2712 { 2713 Mat_MPIBAIJ *b; 2714 PetscErrorCode ierr; 2715 PetscInt i; 2716 2717 PetscFunctionBegin; 2718 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2719 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2720 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2721 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2722 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2723 2724 if (d_nnz) { 2725 for (i=0; i<B->rmap->n/bs; i++) { 2726 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2727 } 2728 } 2729 if (o_nnz) { 2730 for (i=0; i<B->rmap->n/bs; i++) { 2731 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2732 } 2733 } 2734 2735 b = (Mat_MPIBAIJ*)B->data; 2736 b->bs2 = bs*bs; 2737 b->mbs = B->rmap->n/bs; 2738 b->nbs = B->cmap->n/bs; 2739 b->Mbs = B->rmap->N/bs; 2740 b->Nbs = B->cmap->N/bs; 2741 2742 for (i=0; i<=b->size; i++) { 2743 b->rangebs[i] = B->rmap->range[i]/bs; 2744 } 2745 b->rstartbs = B->rmap->rstart/bs; 2746 b->rendbs = B->rmap->rend/bs; 2747 b->cstartbs = B->cmap->rstart/bs; 2748 b->cendbs = B->cmap->rend/bs; 2749 2750 if (!B->preallocated) { 2751 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2752 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2753 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2754 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2755 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2756 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2757 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2758 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2759 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2760 } 2761 2762 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2763 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2764 B->preallocated = PETSC_TRUE; 2765 PetscFunctionReturn(0); 2766 } 2767 2768 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2769 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2770 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2773 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2774 { 2775 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2776 PetscErrorCode ierr; 2777 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2778 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2779 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2780 2781 PetscFunctionBegin; 2782 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2783 ii[0] = 0; 2784 for (i=0; i<M; i++) { 2785 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2786 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2787 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2788 /* remove one from count of matrix has diagonal */ 2789 for (j=id[i]; j<id[i+1]; j++) { 2790 if (jd[j] == i) {ii[i+1]--;break;} 2791 } 2792 } 2793 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2794 cnt = 0; 2795 for (i=0; i<M; i++) { 2796 for (j=io[i]; j<io[i+1]; j++) { 2797 if (garray[jo[j]] > rstart) break; 2798 jj[cnt++] = garray[jo[j]]; 2799 } 2800 for (k=id[i]; k<id[i+1]; k++) { 2801 if (jd[k] != i) { 2802 jj[cnt++] = rstart + jd[k]; 2803 } 2804 } 2805 for (; j<io[i+1]; j++) { 2806 jj[cnt++] = garray[jo[j]]; 2807 } 2808 } 2809 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2814 2815 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2816 2817 #undef __FUNCT__ 2818 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2819 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2820 { 2821 PetscErrorCode ierr; 2822 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2823 Mat B; 2824 Mat_MPIAIJ *b; 2825 2826 PetscFunctionBegin; 2827 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2828 2829 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2830 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2831 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2832 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2833 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2834 b = (Mat_MPIAIJ*) B->data; 2835 2836 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2837 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2838 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2839 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2840 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2841 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2842 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2843 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2844 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2845 if (reuse == MAT_REUSE_MATRIX) { 2846 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2847 } else { 2848 *newmat = B; 2849 } 2850 PetscFunctionReturn(0); 2851 } 2852 2853 #if defined(PETSC_HAVE_MUMPS) 2854 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2855 #endif 2856 2857 /*MC 2858 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2859 2860 Options Database Keys: 2861 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2862 . -mat_block_size <bs> - set the blocksize used to store the matrix 2863 - -mat_use_hash_table <fact> 2864 2865 Level: beginner 2866 2867 .seealso: MatCreateMPIBAIJ 2868 M*/ 2869 2870 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2871 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatCreate_MPIBAIJ" 2874 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2875 { 2876 Mat_MPIBAIJ *b; 2877 PetscErrorCode ierr; 2878 PetscBool flg; 2879 2880 PetscFunctionBegin; 2881 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2882 B->data = (void*)b; 2883 2884 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2885 B->assembled = PETSC_FALSE; 2886 2887 B->insertmode = NOT_SET_VALUES; 2888 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2889 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2890 2891 /* build local table of row and column ownerships */ 2892 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2893 2894 /* build cache for off array entries formed */ 2895 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2896 2897 b->donotstash = PETSC_FALSE; 2898 b->colmap = NULL; 2899 b->garray = NULL; 2900 b->roworiented = PETSC_TRUE; 2901 2902 /* stuff used in block assembly */ 2903 b->barray = 0; 2904 2905 /* stuff used for matrix vector multiply */ 2906 b->lvec = 0; 2907 b->Mvctx = 0; 2908 2909 /* stuff for MatGetRow() */ 2910 b->rowindices = 0; 2911 b->rowvalues = 0; 2912 b->getrowactive = PETSC_FALSE; 2913 2914 /* hash table stuff */ 2915 b->ht = 0; 2916 b->hd = 0; 2917 b->ht_size = 0; 2918 b->ht_flag = PETSC_FALSE; 2919 b->ht_fact = 0; 2920 b->ht_total_ct = 0; 2921 b->ht_insert_ct = 0; 2922 2923 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 2924 b->ijonly = PETSC_FALSE; 2925 2926 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2927 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 2928 if (flg) { 2929 PetscReal fact = 1.39; 2930 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2931 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2932 if (fact <= 1.0) fact = 1.39; 2933 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2934 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2935 } 2936 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2937 2938 #if defined(PETSC_HAVE_MUMPS) 2939 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 2940 #endif 2941 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2942 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2943 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 2944 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2945 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2946 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2947 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2948 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2949 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2950 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2951 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 2952 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2953 PetscFunctionReturn(0); 2954 } 2955 2956 /*MC 2957 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2958 2959 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2960 and MATMPIBAIJ otherwise. 2961 2962 Options Database Keys: 2963 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2964 2965 Level: beginner 2966 2967 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2968 M*/ 2969 2970 #undef __FUNCT__ 2971 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2972 /*@C 2973 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2974 (block compressed row). For good matrix assembly performance 2975 the user should preallocate the matrix storage by setting the parameters 2976 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2977 performance can be increased by more than a factor of 50. 2978 2979 Collective on Mat 2980 2981 Input Parameters: 2982 + A - the matrix 2983 . bs - size of block 2984 . d_nz - number of block nonzeros per block row in diagonal portion of local 2985 submatrix (same for all local rows) 2986 . d_nnz - array containing the number of block nonzeros in the various block rows 2987 of the in diagonal portion of the local (possibly different for each block 2988 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2989 set it even if it is zero. 2990 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2991 submatrix (same for all local rows). 2992 - o_nnz - array containing the number of nonzeros in the various block rows of the 2993 off-diagonal portion of the local submatrix (possibly different for 2994 each block row) or NULL. 2995 2996 If the *_nnz parameter is given then the *_nz parameter is ignored 2997 2998 Options Database Keys: 2999 + -mat_block_size - size of the blocks to use 3000 - -mat_use_hash_table <fact> 3001 3002 Notes: 3003 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3004 than it must be used on all processors that share the object for that argument. 3005 3006 Storage Information: 3007 For a square global matrix we define each processor's diagonal portion 3008 to be its local rows and the corresponding columns (a square submatrix); 3009 each processor's off-diagonal portion encompasses the remainder of the 3010 local matrix (a rectangular submatrix). 3011 3012 The user can specify preallocated storage for the diagonal part of 3013 the local submatrix with either d_nz or d_nnz (not both). Set 3014 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3015 memory allocation. Likewise, specify preallocated storage for the 3016 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3017 3018 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3019 the figure below we depict these three local rows and all columns (0-11). 3020 3021 .vb 3022 0 1 2 3 4 5 6 7 8 9 10 11 3023 -------------------------- 3024 row 3 |o o o d d d o o o o o o 3025 row 4 |o o o d d d o o o o o o 3026 row 5 |o o o d d d o o o o o o 3027 -------------------------- 3028 .ve 3029 3030 Thus, any entries in the d locations are stored in the d (diagonal) 3031 submatrix, and any entries in the o locations are stored in the 3032 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3033 stored simply in the MATSEQBAIJ format for compressed row storage. 3034 3035 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3036 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3037 In general, for PDE problems in which most nonzeros are near the diagonal, 3038 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3039 or you will get TERRIBLE performance; see the users' manual chapter on 3040 matrices. 3041 3042 You can call MatGetInfo() to get information on how effective the preallocation was; 3043 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3044 You can also run with the option -info and look for messages with the string 3045 malloc in them to see if additional memory allocation was needed. 3046 3047 Level: intermediate 3048 3049 .keywords: matrix, block, aij, compressed row, sparse, parallel 3050 3051 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3052 @*/ 3053 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3054 { 3055 PetscErrorCode ierr; 3056 3057 PetscFunctionBegin; 3058 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3059 PetscValidType(B,1); 3060 PetscValidLogicalCollectiveInt(B,bs,2); 3061 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3062 PetscFunctionReturn(0); 3063 } 3064 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatCreateBAIJ" 3067 /*@C 3068 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3069 (block compressed row). For good matrix assembly performance 3070 the user should preallocate the matrix storage by setting the parameters 3071 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3072 performance can be increased by more than a factor of 50. 3073 3074 Collective on MPI_Comm 3075 3076 Input Parameters: 3077 + comm - MPI communicator 3078 . bs - size of blockk 3079 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3080 This value should be the same as the local size used in creating the 3081 y vector for the matrix-vector product y = Ax. 3082 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3083 This value should be the same as the local size used in creating the 3084 x vector for the matrix-vector product y = Ax. 3085 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3086 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3087 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3088 submatrix (same for all local rows) 3089 . d_nnz - array containing the number of nonzero blocks in the various block rows 3090 of the in diagonal portion of the local (possibly different for each block 3091 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3092 and set it even if it is zero. 3093 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3094 submatrix (same for all local rows). 3095 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3096 off-diagonal portion of the local submatrix (possibly different for 3097 each block row) or NULL. 3098 3099 Output Parameter: 3100 . A - the matrix 3101 3102 Options Database Keys: 3103 + -mat_block_size - size of the blocks to use 3104 - -mat_use_hash_table <fact> 3105 3106 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3107 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3108 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3109 3110 Notes: 3111 If the *_nnz parameter is given then the *_nz parameter is ignored 3112 3113 A nonzero block is any block that as 1 or more nonzeros in it 3114 3115 The user MUST specify either the local or global matrix dimensions 3116 (possibly both). 3117 3118 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3119 than it must be used on all processors that share the object for that argument. 3120 3121 Storage Information: 3122 For a square global matrix we define each processor's diagonal portion 3123 to be its local rows and the corresponding columns (a square submatrix); 3124 each processor's off-diagonal portion encompasses the remainder of the 3125 local matrix (a rectangular submatrix). 3126 3127 The user can specify preallocated storage for the diagonal part of 3128 the local submatrix with either d_nz or d_nnz (not both). Set 3129 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3130 memory allocation. Likewise, specify preallocated storage for the 3131 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3132 3133 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3134 the figure below we depict these three local rows and all columns (0-11). 3135 3136 .vb 3137 0 1 2 3 4 5 6 7 8 9 10 11 3138 -------------------------- 3139 row 3 |o o o d d d o o o o o o 3140 row 4 |o o o d d d o o o o o o 3141 row 5 |o o o d d d o o o o o o 3142 -------------------------- 3143 .ve 3144 3145 Thus, any entries in the d locations are stored in the d (diagonal) 3146 submatrix, and any entries in the o locations are stored in the 3147 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3148 stored simply in the MATSEQBAIJ format for compressed row storage. 3149 3150 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3151 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3152 In general, for PDE problems in which most nonzeros are near the diagonal, 3153 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3154 or you will get TERRIBLE performance; see the users' manual chapter on 3155 matrices. 3156 3157 Level: intermediate 3158 3159 .keywords: matrix, block, aij, compressed row, sparse, parallel 3160 3161 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3162 @*/ 3163 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3164 { 3165 PetscErrorCode ierr; 3166 PetscMPIInt size; 3167 3168 PetscFunctionBegin; 3169 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3170 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3172 if (size > 1) { 3173 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3174 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3175 } else { 3176 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3177 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3178 } 3179 PetscFunctionReturn(0); 3180 } 3181 3182 #undef __FUNCT__ 3183 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3184 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3185 { 3186 Mat mat; 3187 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3188 PetscErrorCode ierr; 3189 PetscInt len=0; 3190 3191 PetscFunctionBegin; 3192 *newmat = 0; 3193 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3194 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3195 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3196 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3197 3198 mat->factortype = matin->factortype; 3199 mat->preallocated = PETSC_TRUE; 3200 mat->assembled = PETSC_TRUE; 3201 mat->insertmode = NOT_SET_VALUES; 3202 3203 a = (Mat_MPIBAIJ*)mat->data; 3204 mat->rmap->bs = matin->rmap->bs; 3205 a->bs2 = oldmat->bs2; 3206 a->mbs = oldmat->mbs; 3207 a->nbs = oldmat->nbs; 3208 a->Mbs = oldmat->Mbs; 3209 a->Nbs = oldmat->Nbs; 3210 3211 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3212 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3213 3214 a->size = oldmat->size; 3215 a->rank = oldmat->rank; 3216 a->donotstash = oldmat->donotstash; 3217 a->roworiented = oldmat->roworiented; 3218 a->rowindices = 0; 3219 a->rowvalues = 0; 3220 a->getrowactive = PETSC_FALSE; 3221 a->barray = 0; 3222 a->rstartbs = oldmat->rstartbs; 3223 a->rendbs = oldmat->rendbs; 3224 a->cstartbs = oldmat->cstartbs; 3225 a->cendbs = oldmat->cendbs; 3226 3227 /* hash table stuff */ 3228 a->ht = 0; 3229 a->hd = 0; 3230 a->ht_size = 0; 3231 a->ht_flag = oldmat->ht_flag; 3232 a->ht_fact = oldmat->ht_fact; 3233 a->ht_total_ct = 0; 3234 a->ht_insert_ct = 0; 3235 3236 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3237 if (oldmat->colmap) { 3238 #if defined(PETSC_USE_CTABLE) 3239 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3240 #else 3241 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3242 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3243 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3244 #endif 3245 } else a->colmap = 0; 3246 3247 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3248 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3249 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3250 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3251 } else a->garray = 0; 3252 3253 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3254 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3255 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3256 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3257 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3258 3259 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3260 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3261 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3262 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3263 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3264 *newmat = mat; 3265 PetscFunctionReturn(0); 3266 } 3267 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatLoad_MPIBAIJ" 3270 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3271 { 3272 PetscErrorCode ierr; 3273 int fd; 3274 PetscInt i,nz,j,rstart,rend; 3275 PetscScalar *vals,*buf; 3276 MPI_Comm comm; 3277 MPI_Status status; 3278 PetscMPIInt rank,size,maxnz; 3279 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3280 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3281 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3282 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3283 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3284 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3285 3286 PetscFunctionBegin; 3287 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3288 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3289 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3290 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3291 3292 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3293 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3294 if (!rank) { 3295 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3296 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3297 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3298 } 3299 3300 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3301 3302 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3303 M = header[1]; N = header[2]; 3304 3305 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3306 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3307 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3308 3309 /* If global sizes are set, check if they are consistent with that given in the file */ 3310 if (sizesset) { 3311 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3312 } 3313 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3314 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3315 3316 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3317 3318 /* 3319 This code adds extra rows to make sure the number of rows is 3320 divisible by the blocksize 3321 */ 3322 Mbs = M/bs; 3323 extra_rows = bs - M + bs*Mbs; 3324 if (extra_rows == bs) extra_rows = 0; 3325 else Mbs++; 3326 if (extra_rows && !rank) { 3327 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3328 } 3329 3330 /* determine ownership of all rows */ 3331 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3332 mbs = Mbs/size + ((Mbs % size) > rank); 3333 m = mbs*bs; 3334 } else { /* User set */ 3335 m = newmat->rmap->n; 3336 mbs = m/bs; 3337 } 3338 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3339 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3340 3341 /* process 0 needs enough room for process with most rows */ 3342 if (!rank) { 3343 mmax = rowners[1]; 3344 for (i=2; i<=size; i++) { 3345 mmax = PetscMax(mmax,rowners[i]); 3346 } 3347 mmax*=bs; 3348 } else mmax = -1; /* unused, but compiler warns anyway */ 3349 3350 rowners[0] = 0; 3351 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3352 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3353 rstart = rowners[rank]; 3354 rend = rowners[rank+1]; 3355 3356 /* distribute row lengths to all processors */ 3357 ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3358 if (!rank) { 3359 mend = m; 3360 if (size == 1) mend = mend - extra_rows; 3361 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3362 for (j=mend; j<m; j++) locrowlens[j] = 1; 3363 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3364 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3365 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3366 for (j=0; j<m; j++) { 3367 procsnz[0] += locrowlens[j]; 3368 } 3369 for (i=1; i<size; i++) { 3370 mend = browners[i+1] - browners[i]; 3371 if (i == size-1) mend = mend - extra_rows; 3372 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3373 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3374 /* calculate the number of nonzeros on each processor */ 3375 for (j=0; j<browners[i+1]-browners[i]; j++) { 3376 procsnz[i] += rowlengths[j]; 3377 } 3378 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3379 } 3380 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3381 } else { 3382 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3383 } 3384 3385 if (!rank) { 3386 /* determine max buffer needed and allocate it */ 3387 maxnz = procsnz[0]; 3388 for (i=1; i<size; i++) { 3389 maxnz = PetscMax(maxnz,procsnz[i]); 3390 } 3391 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3392 3393 /* read in my part of the matrix column indices */ 3394 nz = procsnz[0]; 3395 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3396 mycols = ibuf; 3397 if (size == 1) nz -= extra_rows; 3398 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3399 if (size == 1) { 3400 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3401 } 3402 3403 /* read in every ones (except the last) and ship off */ 3404 for (i=1; i<size-1; i++) { 3405 nz = procsnz[i]; 3406 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3407 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3408 } 3409 /* read in the stuff for the last proc */ 3410 if (size != 1) { 3411 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3412 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3413 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3414 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3415 } 3416 ierr = PetscFree(cols);CHKERRQ(ierr); 3417 } else { 3418 /* determine buffer space needed for message */ 3419 nz = 0; 3420 for (i=0; i<m; i++) { 3421 nz += locrowlens[i]; 3422 } 3423 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3424 mycols = ibuf; 3425 /* receive message of column indices*/ 3426 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3427 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3428 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3429 } 3430 3431 /* loop over local rows, determining number of off diagonal entries */ 3432 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3433 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3434 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3435 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3436 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3437 rowcount = 0; nzcount = 0; 3438 for (i=0; i<mbs; i++) { 3439 dcount = 0; 3440 odcount = 0; 3441 for (j=0; j<bs; j++) { 3442 kmax = locrowlens[rowcount]; 3443 for (k=0; k<kmax; k++) { 3444 tmp = mycols[nzcount++]/bs; 3445 if (!mask[tmp]) { 3446 mask[tmp] = 1; 3447 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3448 else masked1[dcount++] = tmp; 3449 } 3450 } 3451 rowcount++; 3452 } 3453 3454 dlens[i] = dcount; 3455 odlens[i] = odcount; 3456 3457 /* zero out the mask elements we set */ 3458 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3459 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3460 } 3461 3462 3463 if (!sizesset) { 3464 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3465 } 3466 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3467 3468 if (!rank) { 3469 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3470 /* read in my part of the matrix numerical values */ 3471 nz = procsnz[0]; 3472 vals = buf; 3473 mycols = ibuf; 3474 if (size == 1) nz -= extra_rows; 3475 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3476 if (size == 1) { 3477 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3478 } 3479 3480 /* insert into matrix */ 3481 jj = rstart*bs; 3482 for (i=0; i<m; i++) { 3483 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3484 mycols += locrowlens[i]; 3485 vals += locrowlens[i]; 3486 jj++; 3487 } 3488 /* read in other processors (except the last one) and ship out */ 3489 for (i=1; i<size-1; i++) { 3490 nz = procsnz[i]; 3491 vals = buf; 3492 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3493 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3494 } 3495 /* the last proc */ 3496 if (size != 1) { 3497 nz = procsnz[i] - extra_rows; 3498 vals = buf; 3499 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3500 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3501 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3502 } 3503 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3504 } else { 3505 /* receive numeric values */ 3506 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3507 3508 /* receive message of values*/ 3509 vals = buf; 3510 mycols = ibuf; 3511 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3512 3513 /* insert into matrix */ 3514 jj = rstart*bs; 3515 for (i=0; i<m; i++) { 3516 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3517 mycols += locrowlens[i]; 3518 vals += locrowlens[i]; 3519 jj++; 3520 } 3521 } 3522 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3523 ierr = PetscFree(buf);CHKERRQ(ierr); 3524 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3525 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3526 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3527 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3528 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3529 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3530 PetscFunctionReturn(0); 3531 } 3532 3533 #undef __FUNCT__ 3534 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3535 /*@ 3536 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3537 3538 Input Parameters: 3539 . mat - the matrix 3540 . fact - factor 3541 3542 Not Collective, each process can use a different factor 3543 3544 Level: advanced 3545 3546 Notes: 3547 This can also be set by the command line option: -mat_use_hash_table <fact> 3548 3549 .keywords: matrix, hashtable, factor, HT 3550 3551 .seealso: MatSetOption() 3552 @*/ 3553 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3554 { 3555 PetscErrorCode ierr; 3556 3557 PetscFunctionBegin; 3558 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3559 PetscFunctionReturn(0); 3560 } 3561 3562 #undef __FUNCT__ 3563 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3564 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3565 { 3566 Mat_MPIBAIJ *baij; 3567 3568 PetscFunctionBegin; 3569 baij = (Mat_MPIBAIJ*)mat->data; 3570 baij->ht_fact = fact; 3571 PetscFunctionReturn(0); 3572 } 3573 3574 #undef __FUNCT__ 3575 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3576 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3577 { 3578 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3579 3580 PetscFunctionBegin; 3581 *Ad = a->A; 3582 *Ao = a->B; 3583 *colmap = a->garray; 3584 PetscFunctionReturn(0); 3585 } 3586 3587 /* 3588 Special version for direct calls from Fortran (to eliminate two function call overheads 3589 */ 3590 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3591 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3592 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3593 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3594 #endif 3595 3596 #undef __FUNCT__ 3597 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3598 /*@C 3599 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3600 3601 Collective on Mat 3602 3603 Input Parameters: 3604 + mat - the matrix 3605 . min - number of input rows 3606 . im - input rows 3607 . nin - number of input columns 3608 . in - input columns 3609 . v - numerical values input 3610 - addvin - INSERT_VALUES or ADD_VALUES 3611 3612 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3613 3614 Level: advanced 3615 3616 .seealso: MatSetValuesBlocked() 3617 @*/ 3618 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3619 { 3620 /* convert input arguments to C version */ 3621 Mat mat = *matin; 3622 PetscInt m = *min, n = *nin; 3623 InsertMode addv = *addvin; 3624 3625 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3626 const MatScalar *value; 3627 MatScalar *barray = baij->barray; 3628 PetscBool roworiented = baij->roworiented; 3629 PetscErrorCode ierr; 3630 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3631 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3632 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3633 3634 PetscFunctionBegin; 3635 /* tasks normally handled by MatSetValuesBlocked() */ 3636 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3637 #if defined(PETSC_USE_DEBUG) 3638 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3639 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3640 #endif 3641 if (mat->assembled) { 3642 mat->was_assembled = PETSC_TRUE; 3643 mat->assembled = PETSC_FALSE; 3644 } 3645 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3646 3647 3648 if (!barray) { 3649 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3650 baij->barray = barray; 3651 } 3652 3653 if (roworiented) stepval = (n-1)*bs; 3654 else stepval = (m-1)*bs; 3655 3656 for (i=0; i<m; i++) { 3657 if (im[i] < 0) continue; 3658 #if defined(PETSC_USE_DEBUG) 3659 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3660 #endif 3661 if (im[i] >= rstart && im[i] < rend) { 3662 row = im[i] - rstart; 3663 for (j=0; j<n; j++) { 3664 /* If NumCol = 1 then a copy is not required */ 3665 if ((roworiented) && (n == 1)) { 3666 barray = (MatScalar*)v + i*bs2; 3667 } else if ((!roworiented) && (m == 1)) { 3668 barray = (MatScalar*)v + j*bs2; 3669 } else { /* Here a copy is required */ 3670 if (roworiented) { 3671 value = v + i*(stepval+bs)*bs + j*bs; 3672 } else { 3673 value = v + j*(stepval+bs)*bs + i*bs; 3674 } 3675 for (ii=0; ii<bs; ii++,value+=stepval) { 3676 for (jj=0; jj<bs; jj++) { 3677 *barray++ = *value++; 3678 } 3679 } 3680 barray -=bs2; 3681 } 3682 3683 if (in[j] >= cstart && in[j] < cend) { 3684 col = in[j] - cstart; 3685 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3686 } else if (in[j] < 0) continue; 3687 #if defined(PETSC_USE_DEBUG) 3688 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3689 #endif 3690 else { 3691 if (mat->was_assembled) { 3692 if (!baij->colmap) { 3693 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3694 } 3695 3696 #if defined(PETSC_USE_DEBUG) 3697 #if defined(PETSC_USE_CTABLE) 3698 { PetscInt data; 3699 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3700 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3701 } 3702 #else 3703 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3704 #endif 3705 #endif 3706 #if defined(PETSC_USE_CTABLE) 3707 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3708 col = (col - 1)/bs; 3709 #else 3710 col = (baij->colmap[in[j]] - 1)/bs; 3711 #endif 3712 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3713 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3714 col = in[j]; 3715 } 3716 } else col = in[j]; 3717 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3718 } 3719 } 3720 } else { 3721 if (!baij->donotstash) { 3722 if (roworiented) { 3723 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3724 } else { 3725 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3726 } 3727 } 3728 } 3729 } 3730 3731 /* task normally handled by MatSetValuesBlocked() */ 3732 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3733 PetscFunctionReturn(0); 3734 } 3735 3736 #undef __FUNCT__ 3737 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3738 /*@ 3739 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3740 CSR format the local rows. 3741 3742 Collective on MPI_Comm 3743 3744 Input Parameters: 3745 + comm - MPI communicator 3746 . bs - the block size, only a block size of 1 is supported 3747 . m - number of local rows (Cannot be PETSC_DECIDE) 3748 . n - This value should be the same as the local size used in creating the 3749 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3750 calculated if N is given) For square matrices n is almost always m. 3751 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3752 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3753 . i - row indices 3754 . j - column indices 3755 - a - matrix values 3756 3757 Output Parameter: 3758 . mat - the matrix 3759 3760 Level: intermediate 3761 3762 Notes: 3763 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3764 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3765 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3766 3767 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3768 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3769 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3770 with column-major ordering within blocks. 3771 3772 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3773 3774 .keywords: matrix, aij, compressed row, sparse, parallel 3775 3776 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3777 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3778 @*/ 3779 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3780 { 3781 PetscErrorCode ierr; 3782 3783 PetscFunctionBegin; 3784 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3785 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3786 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3787 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3788 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3789 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3790 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3791 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3792 PetscFunctionReturn(0); 3793 } 3794