1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) { 29 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 30 } 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++) { 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 40 va[i] = vb[i]; 41 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 42 } 43 } 44 45 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 46 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 47 ierr = PetscFree(idxb);CHKERRQ(ierr); 48 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #undef __FUNCT__ 53 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 55 { 56 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 61 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 /* 79 Local utility routine that creates a mapping from the global column 80 number to the local number in the off-diagonal part of the local 81 storage of the matrix. This is done in a non scalable way since the 82 length of colmap equals the global matrix length. 83 */ 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 86 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 87 { 88 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 89 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 90 PetscErrorCode ierr; 91 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 92 93 PetscFunctionBegin; 94 #if defined(PETSC_USE_CTABLE) 95 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 96 for (i=0; i<nbs; i++) { 97 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 98 } 99 #else 100 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 101 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 104 #endif 105 PetscFunctionReturn(0); 106 } 107 108 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 109 { \ 110 \ 111 brow = row/bs; \ 112 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 113 rmax = aimax[brow]; nrow = ailen[brow]; \ 114 bcol = col/bs; \ 115 ridx = row % bs; cidx = col % bs; \ 116 low = 0; high = nrow; \ 117 while (high-low > 3) { \ 118 t = (low+high)/2; \ 119 if (rp[t] > bcol) high = t; \ 120 else low = t; \ 121 } \ 122 for (_i=low; _i<high; _i++) { \ 123 if (rp[_i] > bcol) break; \ 124 if (rp[_i] == bcol) { \ 125 bap = ap + bs2*_i + bs*cidx + ridx; \ 126 if (addv == ADD_VALUES) *bap += value; \ 127 else *bap = value; \ 128 goto a_noinsert; \ 129 } \ 130 } \ 131 if (a->nonew == 1) goto a_noinsert; \ 132 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 134 N = nrow++ - 1; \ 135 /* shift up all the later entries in this row */ \ 136 for (ii=N; ii>=_i; ii--) { \ 137 rp[ii+1] = rp[ii]; \ 138 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 139 } \ 140 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 141 rp[_i] = bcol; \ 142 ap[bs2*_i + bs*cidx + ridx] = value; \ 143 a_noinsert:; \ 144 ailen[brow] = nrow; \ 145 } 146 147 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 148 { \ 149 brow = row/bs; \ 150 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 151 rmax = bimax[brow]; nrow = bilen[brow]; \ 152 bcol = col/bs; \ 153 ridx = row % bs; cidx = col % bs; \ 154 low = 0; high = nrow; \ 155 while (high-low > 3) { \ 156 t = (low+high)/2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i=low; _i<high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2*_i + bs*cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto b_noinsert; \ 167 } \ 168 } \ 169 if (b->nonew == 1) goto b_noinsert; \ 170 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 171 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa =a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba =b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined(PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba =b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray = baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) stepval = (n-1)*bs; 296 else stepval = (m-1)*bs; 297 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if ((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend) { 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } else if (in[j] < 0) continue; 328 #if defined(PETSC_USE_DEBUG) 329 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 330 #endif 331 else { 332 if (mat->was_assembled) { 333 if (!baij->colmap) { 334 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 335 } 336 337 #if defined(PETSC_USE_DEBUG) 338 #if defined(PETSC_USE_CTABLE) 339 { PetscInt data; 340 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 341 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 342 } 343 #else 344 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 #endif 346 #endif 347 #if defined(PETSC_USE_CTABLE) 348 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 349 col = (col - 1)/bs; 350 #else 351 col = (baij->colmap[in[j]] - 1)/bs; 352 #endif 353 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 354 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 355 col = in[j]; 356 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 357 } else col = in[j]; 358 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 359 } 360 } 361 } else { 362 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 363 if (!baij->donotstash) { 364 if (roworiented) { 365 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 366 } else { 367 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 375 #define HASH_KEY 0.6180339887 376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 384 PetscBool roworiented = baij->roworiented; 385 PetscErrorCode ierr; 386 PetscInt i,j,row,col; 387 PetscInt rstart_orig=mat->rmap->rstart; 388 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 389 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 390 PetscReal tmp; 391 MatScalar **HD = baij->hd,value; 392 #if defined(PETSC_USE_DEBUG) 393 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 394 #endif 395 396 PetscFunctionBegin; 397 if (v) PetscValidScalarPointer(v,6); 398 for (i=0; i<m; i++) { 399 #if defined(PETSC_USE_DEBUG) 400 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 401 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 402 #endif 403 row = im[i]; 404 if (row >= rstart_orig && row < rend_orig) { 405 for (j=0; j<n; j++) { 406 col = in[j]; 407 if (roworiented) value = v[i*n+j]; 408 else value = v[i+j*m]; 409 /* Look up PetscInto the Hash Table */ 410 key = (row/bs)*Nbs+(col/bs)+1; 411 h1 = HASH(size,key,tmp); 412 413 414 idx = h1; 415 #if defined(PETSC_USE_DEBUG) 416 insert_ct++; 417 total_ct++; 418 if (HT[idx] != key) { 419 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 420 if (idx == size) { 421 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 422 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 423 } 424 } 425 #else 426 if (HT[idx] != key) { 427 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 428 if (idx == size) { 429 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 430 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 431 } 432 } 433 #endif 434 /* A HASH table entry is found, so insert the values at the correct address */ 435 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 436 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 437 } 438 } else if (!baij->donotstash) { 439 if (roworiented) { 440 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 441 } else { 442 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 443 } 444 } 445 } 446 #if defined(PETSC_USE_DEBUG) 447 baij->ht_total_ct = total_ct; 448 baij->ht_insert_ct = insert_ct; 449 #endif 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 455 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 456 { 457 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 458 PetscBool roworiented = baij->roworiented; 459 PetscErrorCode ierr; 460 PetscInt i,j,ii,jj,row,col; 461 PetscInt rstart=baij->rstartbs; 462 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 463 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 464 PetscReal tmp; 465 MatScalar **HD = baij->hd,*baij_a; 466 const PetscScalar *v_t,*value; 467 #if defined(PETSC_USE_DEBUG) 468 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 469 #endif 470 471 PetscFunctionBegin; 472 if (roworiented) stepval = (n-1)*bs; 473 else stepval = (m-1)*bs; 474 475 for (i=0; i<m; i++) { 476 #if defined(PETSC_USE_DEBUG) 477 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 478 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 479 #endif 480 row = im[i]; 481 v_t = v + i*nbs2; 482 if (row >= rstart && row < rend) { 483 for (j=0; j<n; j++) { 484 col = in[j]; 485 486 /* Look up into the Hash Table */ 487 key = row*Nbs+col+1; 488 h1 = HASH(size,key,tmp); 489 490 idx = h1; 491 #if defined(PETSC_USE_DEBUG) 492 total_ct++; 493 insert_ct++; 494 if (HT[idx] != key) { 495 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 496 if (idx == size) { 497 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 498 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 499 } 500 } 501 #else 502 if (HT[idx] != key) { 503 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 504 if (idx == size) { 505 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 506 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 507 } 508 } 509 #endif 510 baij_a = HD[idx]; 511 if (roworiented) { 512 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 513 /* value = v + (i*(stepval+bs)+j)*bs; */ 514 value = v_t; 515 v_t += bs; 516 if (addv == ADD_VALUES) { 517 for (ii=0; ii<bs; ii++,value+=stepval) { 518 for (jj=ii; jj<bs2; jj+=bs) { 519 baij_a[jj] += *value++; 520 } 521 } 522 } else { 523 for (ii=0; ii<bs; ii++,value+=stepval) { 524 for (jj=ii; jj<bs2; jj+=bs) { 525 baij_a[jj] = *value++; 526 } 527 } 528 } 529 } else { 530 value = v + j*(stepval+bs)*bs + i*bs; 531 if (addv == ADD_VALUES) { 532 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 533 for (jj=0; jj<bs; jj++) { 534 baij_a[jj] += *value++; 535 } 536 } 537 } else { 538 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 539 for (jj=0; jj<bs; jj++) { 540 baij_a[jj] = *value++; 541 } 542 } 543 } 544 } 545 } 546 } else { 547 if (!baij->donotstash) { 548 if (roworiented) { 549 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 550 } else { 551 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 #if defined(PETSC_USE_DEBUG) 557 baij->ht_total_ct = total_ct; 558 baij->ht_insert_ct = insert_ct; 559 #endif 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatGetValues_MPIBAIJ" 565 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 566 { 567 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 568 PetscErrorCode ierr; 569 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 570 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 571 572 PetscFunctionBegin; 573 for (i=0; i<m; i++) { 574 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 575 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 576 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 577 row = idxm[i] - bsrstart; 578 for (j=0; j<n; j++) { 579 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 580 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 581 if (idxn[j] >= bscstart && idxn[j] < bscend) { 582 col = idxn[j] - bscstart; 583 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 584 } else { 585 if (!baij->colmap) { 586 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 587 } 588 #if defined(PETSC_USE_CTABLE) 589 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 590 data--; 591 #else 592 data = baij->colmap[idxn[j]/bs]-1; 593 #endif 594 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 595 else { 596 col = data + idxn[j]%bs; 597 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } 599 } 600 } 601 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatNorm_MPIBAIJ" 608 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 609 { 610 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 611 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 612 PetscErrorCode ierr; 613 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 614 PetscReal sum = 0.0; 615 MatScalar *v; 616 617 PetscFunctionBegin; 618 if (baij->size == 1) { 619 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 620 } else { 621 if (type == NORM_FROBENIUS) { 622 v = amat->a; 623 nz = amat->nz*bs2; 624 for (i=0; i<nz; i++) { 625 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 626 } 627 v = bmat->a; 628 nz = bmat->nz*bs2; 629 for (i=0; i<nz; i++) { 630 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 631 } 632 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 633 *nrm = PetscSqrtReal(*nrm); 634 } else if (type == NORM_1) { /* max column sum */ 635 PetscReal *tmp,*tmp2; 636 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 637 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 638 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 639 v = amat->a; jj = amat->j; 640 for (i=0; i<amat->nz; i++) { 641 for (j=0; j<bs; j++) { 642 col = bs*(cstart + *jj) + j; /* column index */ 643 for (row=0; row<bs; row++) { 644 tmp[col] += PetscAbsScalar(*v); v++; 645 } 646 } 647 jj++; 648 } 649 v = bmat->a; jj = bmat->j; 650 for (i=0; i<bmat->nz; i++) { 651 for (j=0; j<bs; j++) { 652 col = bs*garray[*jj] + j; 653 for (row=0; row<bs; row++) { 654 tmp[col] += PetscAbsScalar(*v); v++; 655 } 656 } 657 jj++; 658 } 659 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 660 *nrm = 0.0; 661 for (j=0; j<mat->cmap->N; j++) { 662 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 663 } 664 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 665 } else if (type == NORM_INFINITY) { /* max row sum */ 666 PetscReal *sums; 667 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 668 sum = 0.0; 669 for (j=0; j<amat->mbs; j++) { 670 for (row=0; row<bs; row++) sums[row] = 0.0; 671 v = amat->a + bs2*amat->i[j]; 672 nz = amat->i[j+1]-amat->i[j]; 673 for (i=0; i<nz; i++) { 674 for (col=0; col<bs; col++) { 675 for (row=0; row<bs; row++) { 676 sums[row] += PetscAbsScalar(*v); v++; 677 } 678 } 679 } 680 v = bmat->a + bs2*bmat->i[j]; 681 nz = bmat->i[j+1]-bmat->i[j]; 682 for (i=0; i<nz; i++) { 683 for (col=0; col<bs; col++) { 684 for (row=0; row<bs; row++) { 685 sums[row] += PetscAbsScalar(*v); v++; 686 } 687 } 688 } 689 for (row=0; row<bs; row++) { 690 if (sums[row] > sum) sum = sums[row]; 691 } 692 } 693 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 694 ierr = PetscFree(sums);CHKERRQ(ierr); 695 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 696 } 697 PetscFunctionReturn(0); 698 } 699 700 /* 701 Creates the hash table, and sets the table 702 This table is created only once. 703 If new entried need to be added to the matrix 704 then the hash table has to be destroyed and 705 recreated. 706 */ 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 709 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 710 { 711 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 712 Mat A = baij->A,B=baij->B; 713 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 714 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 715 PetscErrorCode ierr; 716 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 717 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 718 PetscInt *HT,key; 719 MatScalar **HD; 720 PetscReal tmp; 721 #if defined(PETSC_USE_INFO) 722 PetscInt ct=0,max=0; 723 #endif 724 725 PetscFunctionBegin; 726 if (baij->ht) PetscFunctionReturn(0); 727 728 baij->ht_size = (PetscInt)(factor*nz); 729 ht_size = baij->ht_size; 730 731 /* Allocate Memory for Hash Table */ 732 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 733 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 734 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 735 HD = baij->hd; 736 HT = baij->ht; 737 738 /* Loop Over A */ 739 for (i=0; i<a->mbs; i++) { 740 for (j=ai[i]; j<ai[i+1]; j++) { 741 row = i+rstart; 742 col = aj[j]+cstart; 743 744 key = row*Nbs + col + 1; 745 h1 = HASH(ht_size,key,tmp); 746 for (k=0; k<ht_size; k++) { 747 if (!HT[(h1+k)%ht_size]) { 748 HT[(h1+k)%ht_size] = key; 749 HD[(h1+k)%ht_size] = a->a + j*bs2; 750 break; 751 #if defined(PETSC_USE_INFO) 752 } else { 753 ct++; 754 #endif 755 } 756 } 757 #if defined(PETSC_USE_INFO) 758 if (k> max) max = k; 759 #endif 760 } 761 } 762 /* Loop Over B */ 763 for (i=0; i<b->mbs; i++) { 764 for (j=bi[i]; j<bi[i+1]; j++) { 765 row = i+rstart; 766 col = garray[bj[j]]; 767 key = row*Nbs + col + 1; 768 h1 = HASH(ht_size,key,tmp); 769 for (k=0; k<ht_size; k++) { 770 if (!HT[(h1+k)%ht_size]) { 771 HT[(h1+k)%ht_size] = key; 772 HD[(h1+k)%ht_size] = b->a + j*bs2; 773 break; 774 #if defined(PETSC_USE_INFO) 775 } else { 776 ct++; 777 #endif 778 } 779 } 780 #if defined(PETSC_USE_INFO) 781 if (k> max) max = k; 782 #endif 783 } 784 } 785 786 /* Print Summary */ 787 #if defined(PETSC_USE_INFO) 788 for (i=0,j=0; i<ht_size; i++) { 789 if (HT[i]) j++; 790 } 791 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 792 #endif 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 798 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 799 { 800 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 801 PetscErrorCode ierr; 802 PetscInt nstash,reallocs; 803 InsertMode addv; 804 805 PetscFunctionBegin; 806 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 807 808 /* make sure all processors are either in INSERTMODE or ADDMODE */ 809 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 810 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 811 mat->insertmode = addv; /* in case this processor had no cache */ 812 813 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 814 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 815 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 816 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 817 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 818 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 824 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 825 { 826 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 827 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 828 PetscErrorCode ierr; 829 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 830 PetscInt *row,*col; 831 PetscBool r1,r2,r3,other_disassembled; 832 MatScalar *val; 833 InsertMode addv = mat->insertmode; 834 PetscMPIInt n; 835 836 PetscFunctionBegin; 837 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 838 if (!baij->donotstash && !mat->nooffprocentries) { 839 while (1) { 840 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 841 if (!flg) break; 842 843 for (i=0; i<n;) { 844 /* Now identify the consecutive vals belonging to the same row */ 845 for (j=i,rstart=row[j]; j<n; j++) { 846 if (row[j] != rstart) break; 847 } 848 if (j < n) ncols = j-i; 849 else ncols = n-i; 850 /* Now assemble all these values with a single function call */ 851 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 852 i = j; 853 } 854 } 855 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 856 /* Now process the block-stash. Since the values are stashed column-oriented, 857 set the roworiented flag to column oriented, and after MatSetValues() 858 restore the original flags */ 859 r1 = baij->roworiented; 860 r2 = a->roworiented; 861 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 862 863 baij->roworiented = PETSC_FALSE; 864 a->roworiented = PETSC_FALSE; 865 866 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 867 while (1) { 868 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 869 if (!flg) break; 870 871 for (i=0; i<n;) { 872 /* Now identify the consecutive vals belonging to the same row */ 873 for (j=i,rstart=row[j]; j<n; j++) { 874 if (row[j] != rstart) break; 875 } 876 if (j < n) ncols = j-i; 877 else ncols = n-i; 878 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 883 884 baij->roworiented = r1; 885 a->roworiented = r2; 886 887 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 888 } 889 890 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 891 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 892 893 /* determine if any processor has disassembled, if so we must 894 also disassemble ourselfs, in order that we may reassemble. */ 895 /* 896 if nonzero structure of submatrix B cannot change then we know that 897 no processor disassembled thus we can skip this stuff 898 */ 899 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 900 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 901 if (mat->was_assembled && !other_disassembled) { 902 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 903 } 904 } 905 906 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 907 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 908 } 909 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 910 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 911 912 #if defined(PETSC_USE_INFO) 913 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 914 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 915 916 baij->ht_total_ct = 0; 917 baij->ht_insert_ct = 0; 918 } 919 #endif 920 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 921 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 922 923 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 924 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 925 } 926 927 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 928 929 baij->rowvalues = 0; 930 PetscFunctionReturn(0); 931 } 932 933 #include <petscdraw.h> 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 936 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 937 { 938 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 939 PetscErrorCode ierr; 940 PetscMPIInt size = baij->size,rank = baij->rank; 941 PetscInt bs = mat->rmap->bs; 942 PetscBool iascii,isdraw; 943 PetscViewer sviewer; 944 PetscViewerFormat format; 945 946 PetscFunctionBegin; 947 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 948 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 949 if (iascii) { 950 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 951 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 952 MatInfo info; 953 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 954 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 957 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 958 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 960 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 962 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 964 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 965 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } else if (format == PETSC_VIEWER_ASCII_INFO) { 968 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 971 PetscFunctionReturn(0); 972 } 973 } 974 975 if (isdraw) { 976 PetscDraw draw; 977 PetscBool isnull; 978 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 979 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 980 } 981 982 if (size == 1) { 983 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 984 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 985 } else { 986 /* assemble the entire matrix onto first processor. */ 987 Mat A; 988 Mat_SeqBAIJ *Aloc; 989 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 990 MatScalar *a; 991 992 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 993 /* Perhaps this should be the type of mat? */ 994 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 995 if (!rank) { 996 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 997 } else { 998 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 999 } 1000 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1001 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1002 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1003 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1004 1005 /* copy over the A part */ 1006 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1007 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1008 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1009 1010 for (i=0; i<mbs; i++) { 1011 rvals[0] = bs*(baij->rstartbs + i); 1012 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1013 for (j=ai[i]; j<ai[i+1]; j++) { 1014 col = (baij->cstartbs+aj[j])*bs; 1015 for (k=0; k<bs; k++) { 1016 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1017 col++; a += bs; 1018 } 1019 } 1020 } 1021 /* copy over the B part */ 1022 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1023 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = baij->garray[aj[j]]*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 ierr = PetscFree(rvals);CHKERRQ(ierr); 1036 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 /* 1039 Everyone has to call to draw the matrix since the graphics waits are 1040 synchronized across all processors that share the PetscDraw object 1041 */ 1042 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1043 if (!rank) { 1044 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1045 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1046 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1047 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1048 } 1049 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1050 ierr = MatDestroy(&A);CHKERRQ(ierr); 1051 } 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1058 { 1059 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1060 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1061 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1062 PetscErrorCode ierr; 1063 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1064 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1065 int fd; 1066 PetscScalar *column_values; 1067 FILE *file; 1068 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1069 PetscInt message_count,flowcontrolcount; 1070 1071 PetscFunctionBegin; 1072 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1073 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1074 nz = bs2*(A->nz + B->nz); 1075 rlen = mat->rmap->n; 1076 if (!rank) { 1077 header[0] = MAT_FILE_CLASSID; 1078 header[1] = mat->rmap->N; 1079 header[2] = mat->cmap->N; 1080 1081 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1082 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1083 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1084 /* get largest number of rows any processor has */ 1085 range = mat->rmap->range; 1086 for (i=1; i<size; i++) { 1087 rlen = PetscMax(rlen,range[i+1] - range[i]); 1088 } 1089 } else { 1090 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 } 1092 1093 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1094 /* compute lengths of each row */ 1095 for (i=0; i<a->mbs; i++) { 1096 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1097 } 1098 /* store the row lengths to the file */ 1099 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1100 if (!rank) { 1101 MPI_Status status; 1102 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1103 rlen = (range[1] - range[0])/bs; 1104 for (i=0; i<rlen; i++) { 1105 for (j=0; j<bs; j++) { 1106 row_lens[i*bs+j] = bs*crow_lens[i]; 1107 } 1108 } 1109 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1110 for (i=1; i<size; i++) { 1111 rlen = (range[i+1] - range[i])/bs; 1112 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1113 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1114 for (k=0; k<rlen; k++) { 1115 for (j=0; j<bs; j++) { 1116 row_lens[k*bs+j] = bs*crow_lens[k]; 1117 } 1118 } 1119 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1120 } 1121 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1122 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1123 } else { 1124 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1125 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1126 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1127 } 1128 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1129 1130 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1131 information needed to make it for each row from a block row. This does require more communication but still not more than 1132 the communication needed for the nonzero values */ 1133 nzmax = nz; /* space a largest processor needs */ 1134 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1135 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1136 cnt = 0; 1137 for (i=0; i<a->mbs; i++) { 1138 pcnt = cnt; 1139 for (j=B->i[i]; j<B->i[i+1]; j++) { 1140 if ((col = garray[B->j[j]]) > cstart) break; 1141 for (l=0; l<bs; l++) { 1142 column_indices[cnt++] = bs*col+l; 1143 } 1144 } 1145 for (k=A->i[i]; k<A->i[i+1]; k++) { 1146 for (l=0; l<bs; l++) { 1147 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1148 } 1149 } 1150 for (; j<B->i[i+1]; j++) { 1151 for (l=0; l<bs; l++) { 1152 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1153 } 1154 } 1155 len = cnt - pcnt; 1156 for (k=1; k<bs; k++) { 1157 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1158 cnt += len; 1159 } 1160 } 1161 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1162 1163 /* store the columns to the file */ 1164 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1165 if (!rank) { 1166 MPI_Status status; 1167 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1168 for (i=1; i<size; i++) { 1169 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1170 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1171 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1172 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1173 } 1174 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1175 } else { 1176 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1177 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1178 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1179 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1180 } 1181 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1182 1183 /* load up the numerical values */ 1184 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1185 cnt = 0; 1186 for (i=0; i<a->mbs; i++) { 1187 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1188 for (j=B->i[i]; j<B->i[i+1]; j++) { 1189 if (garray[B->j[j]] > cstart) break; 1190 for (l=0; l<bs; l++) { 1191 for (ll=0; ll<bs; ll++) { 1192 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1193 } 1194 } 1195 cnt += bs; 1196 } 1197 for (k=A->i[i]; k<A->i[i+1]; k++) { 1198 for (l=0; l<bs; l++) { 1199 for (ll=0; ll<bs; ll++) { 1200 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1201 } 1202 } 1203 cnt += bs; 1204 } 1205 for (; j<B->i[i+1]; j++) { 1206 for (l=0; l<bs; l++) { 1207 for (ll=0; ll<bs; ll++) { 1208 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1209 } 1210 } 1211 cnt += bs; 1212 } 1213 cnt += (bs-1)*rlen; 1214 } 1215 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1216 1217 /* store the column values to the file */ 1218 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1219 if (!rank) { 1220 MPI_Status status; 1221 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1222 for (i=1; i<size; i++) { 1223 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1224 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1225 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1229 } else { 1230 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1231 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1234 } 1235 ierr = PetscFree(column_values);CHKERRQ(ierr); 1236 1237 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1238 if (file) { 1239 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1240 } 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatView_MPIBAIJ" 1246 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1247 { 1248 PetscErrorCode ierr; 1249 PetscBool iascii,isdraw,issocket,isbinary; 1250 1251 PetscFunctionBegin; 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1255 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1256 if (iascii || isdraw || issocket) { 1257 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1258 } else if (isbinary) { 1259 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1260 } 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1266 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1267 { 1268 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1269 PetscErrorCode ierr; 1270 1271 PetscFunctionBegin; 1272 #if defined(PETSC_USE_LOG) 1273 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1274 #endif 1275 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1276 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1277 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1278 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1279 #if defined(PETSC_USE_CTABLE) 1280 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1281 #else 1282 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1283 #endif 1284 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1285 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1286 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1287 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1288 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1289 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1290 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1291 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1292 1293 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatMult_MPIBAIJ" 1308 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1309 { 1310 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1311 PetscErrorCode ierr; 1312 PetscInt nt; 1313 1314 PetscFunctionBegin; 1315 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1316 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1317 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1318 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1319 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1320 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1321 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1328 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1329 { 1330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1336 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1343 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1344 { 1345 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1346 PetscErrorCode ierr; 1347 PetscBool merged; 1348 1349 PetscFunctionBegin; 1350 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1351 /* do nondiagonal part */ 1352 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1353 if (!merged) { 1354 /* send it on its way */ 1355 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1356 /* do local part */ 1357 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1358 /* receive remote parts: note this assumes the values are not actually */ 1359 /* inserted in yy until the next line */ 1360 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1361 } else { 1362 /* do local part */ 1363 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1364 /* send it on its way */ 1365 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 /* values actually were received in the Begin() but we need to call this nop */ 1367 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1374 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1375 { 1376 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1377 PetscErrorCode ierr; 1378 1379 PetscFunctionBegin; 1380 /* do nondiagonal part */ 1381 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1382 /* send it on its way */ 1383 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1384 /* do local part */ 1385 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1386 /* receive remote parts: note this assumes the values are not actually */ 1387 /* inserted in yy until the next line, which is true for my implementation*/ 1388 /* but is not perhaps always true. */ 1389 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /* 1394 This only works correctly for square matrices where the subblock A->A is the 1395 diagonal block 1396 */ 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1399 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1400 { 1401 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1402 PetscErrorCode ierr; 1403 1404 PetscFunctionBegin; 1405 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1406 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatScale_MPIBAIJ" 1412 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1413 { 1414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1419 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1425 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1426 { 1427 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1428 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1429 PetscErrorCode ierr; 1430 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1431 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1432 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1433 1434 PetscFunctionBegin; 1435 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1436 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1437 mat->getrowactive = PETSC_TRUE; 1438 1439 if (!mat->rowvalues && (idx || v)) { 1440 /* 1441 allocate enough space to hold information from the longest row. 1442 */ 1443 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1444 PetscInt max = 1,mbs = mat->mbs,tmp; 1445 for (i=0; i<mbs; i++) { 1446 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1447 if (max < tmp) max = tmp; 1448 } 1449 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1450 } 1451 lrow = row - brstart; 1452 1453 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1454 if (!v) {pvA = 0; pvB = 0;} 1455 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1456 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1457 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1458 nztot = nzA + nzB; 1459 1460 cmap = mat->garray; 1461 if (v || idx) { 1462 if (nztot) { 1463 /* Sort by increasing column numbers, assuming A and B already sorted */ 1464 PetscInt imark = -1; 1465 if (v) { 1466 *v = v_p = mat->rowvalues; 1467 for (i=0; i<nzB; i++) { 1468 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1469 else break; 1470 } 1471 imark = i; 1472 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1473 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1474 } 1475 if (idx) { 1476 *idx = idx_p = mat->rowindices; 1477 if (imark > -1) { 1478 for (i=0; i<imark; i++) { 1479 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1480 } 1481 } else { 1482 for (i=0; i<nzB; i++) { 1483 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1484 else break; 1485 } 1486 imark = i; 1487 } 1488 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1489 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1490 } 1491 } else { 1492 if (idx) *idx = 0; 1493 if (v) *v = 0; 1494 } 1495 } 1496 *nz = nztot; 1497 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 1508 PetscFunctionBegin; 1509 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1510 baij->getrowactive = PETSC_FALSE; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1517 { 1518 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1523 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1530 { 1531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1532 Mat A = a->A,B = a->B; 1533 PetscErrorCode ierr; 1534 PetscReal isend[5],irecv[5]; 1535 1536 PetscFunctionBegin; 1537 info->block_size = (PetscReal)matin->rmap->bs; 1538 1539 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1540 1541 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1542 isend[3] = info->memory; isend[4] = info->mallocs; 1543 1544 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1545 1546 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1547 isend[3] += info->memory; isend[4] += info->mallocs; 1548 1549 if (flag == MAT_LOCAL) { 1550 info->nz_used = isend[0]; 1551 info->nz_allocated = isend[1]; 1552 info->nz_unneeded = isend[2]; 1553 info->memory = isend[3]; 1554 info->mallocs = isend[4]; 1555 } else if (flag == MAT_GLOBAL_MAX) { 1556 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1557 1558 info->nz_used = irecv[0]; 1559 info->nz_allocated = irecv[1]; 1560 info->nz_unneeded = irecv[2]; 1561 info->memory = irecv[3]; 1562 info->mallocs = irecv[4]; 1563 } else if (flag == MAT_GLOBAL_SUM) { 1564 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1565 1566 info->nz_used = irecv[0]; 1567 info->nz_allocated = irecv[1]; 1568 info->nz_unneeded = irecv[2]; 1569 info->memory = irecv[3]; 1570 info->mallocs = irecv[4]; 1571 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1572 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1573 info->fill_ratio_needed = 0; 1574 info->factor_mallocs = 0; 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1580 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1581 { 1582 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 switch (op) { 1587 case MAT_NEW_NONZERO_LOCATIONS: 1588 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1589 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1590 case MAT_KEEP_NONZERO_PATTERN: 1591 case MAT_NEW_NONZERO_LOCATION_ERR: 1592 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1593 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1594 break; 1595 case MAT_ROW_ORIENTED: 1596 a->roworiented = flg; 1597 1598 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1599 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1600 break; 1601 case MAT_NEW_DIAGONALS: 1602 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1603 break; 1604 case MAT_IGNORE_OFF_PROC_ENTRIES: 1605 a->donotstash = flg; 1606 break; 1607 case MAT_USE_HASH_TABLE: 1608 a->ht_flag = flg; 1609 break; 1610 case MAT_SYMMETRIC: 1611 case MAT_STRUCTURALLY_SYMMETRIC: 1612 case MAT_HERMITIAN: 1613 case MAT_SYMMETRY_ETERNAL: 1614 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1615 break; 1616 default: 1617 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1618 } 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1624 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1625 { 1626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1627 Mat_SeqBAIJ *Aloc; 1628 Mat B; 1629 PetscErrorCode ierr; 1630 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1631 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1632 MatScalar *a; 1633 1634 PetscFunctionBegin; 1635 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1636 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1637 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1638 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1639 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1640 /* Do not know preallocation information, but must set block size */ 1641 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1642 } else { 1643 B = *matout; 1644 } 1645 1646 /* copy over the A part */ 1647 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1648 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1649 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1650 1651 for (i=0; i<mbs; i++) { 1652 rvals[0] = bs*(baij->rstartbs + i); 1653 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1654 for (j=ai[i]; j<ai[i+1]; j++) { 1655 col = (baij->cstartbs+aj[j])*bs; 1656 for (k=0; k<bs; k++) { 1657 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1658 1659 col++; a += bs; 1660 } 1661 } 1662 } 1663 /* copy over the B part */ 1664 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1665 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1666 for (i=0; i<mbs; i++) { 1667 rvals[0] = bs*(baij->rstartbs + i); 1668 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1669 for (j=ai[i]; j<ai[i+1]; j++) { 1670 col = baij->garray[aj[j]]*bs; 1671 for (k=0; k<bs; k++) { 1672 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1673 col++; 1674 a += bs; 1675 } 1676 } 1677 } 1678 ierr = PetscFree(rvals);CHKERRQ(ierr); 1679 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 1682 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1683 else { 1684 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1691 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1692 { 1693 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1694 Mat a = baij->A,b = baij->B; 1695 PetscErrorCode ierr; 1696 PetscInt s1,s2,s3; 1697 1698 PetscFunctionBegin; 1699 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1700 if (rr) { 1701 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1702 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1703 /* Overlap communication with computation. */ 1704 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1705 } 1706 if (ll) { 1707 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1708 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1709 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1710 } 1711 /* scale the diagonal block */ 1712 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1713 1714 if (rr) { 1715 /* Do a scatter end and then right scale the off-diagonal block */ 1716 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1717 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1724 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1725 { 1726 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1727 PetscErrorCode ierr; 1728 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1729 PetscInt i,*owners = A->rmap->range; 1730 PetscInt *nprocs,j,idx,nsends,row; 1731 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1732 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1733 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1734 MPI_Comm comm; 1735 MPI_Request *send_waits,*recv_waits; 1736 MPI_Status recv_status,*send_status; 1737 const PetscScalar *xx; 1738 PetscScalar *bb; 1739 #if defined(PETSC_DEBUG) 1740 PetscBool found = PETSC_FALSE; 1741 #endif 1742 1743 PetscFunctionBegin; 1744 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1745 /* first count number of contributors to each processor */ 1746 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1747 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1748 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1749 j = 0; 1750 for (i=0; i<N; i++) { 1751 if (lastidx > (idx = rows[i])) j = 0; 1752 lastidx = idx; 1753 for (; j<size; j++) { 1754 if (idx >= owners[j] && idx < owners[j+1]) { 1755 nprocs[2*j]++; 1756 nprocs[2*j+1] = 1; 1757 owner[i] = j; 1758 #if defined(PETSC_DEBUG) 1759 found = PETSC_TRUE; 1760 #endif 1761 break; 1762 } 1763 } 1764 #if defined(PETSC_DEBUG) 1765 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1766 found = PETSC_FALSE; 1767 #endif 1768 } 1769 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 1770 1771 if (A->nooffproczerorows) { 1772 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1773 nrecvs = nsends; 1774 nmax = N; 1775 } else { 1776 /* inform other processors of number of messages and max length*/ 1777 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1778 } 1779 1780 /* post receives: */ 1781 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1782 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1783 for (i=0; i<nrecvs; i++) { 1784 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1785 } 1786 1787 /* do sends: 1788 1) starts[i] gives the starting index in svalues for stuff going to 1789 the ith processor 1790 */ 1791 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1792 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1793 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1794 starts[0] = 0; 1795 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1796 for (i=0; i<N; i++) { 1797 svalues[starts[owner[i]]++] = rows[i]; 1798 } 1799 1800 starts[0] = 0; 1801 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1802 count = 0; 1803 for (i=0; i<size; i++) { 1804 if (nprocs[2*i+1]) { 1805 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1806 } 1807 } 1808 ierr = PetscFree(starts);CHKERRQ(ierr); 1809 1810 base = owners[rank]; 1811 1812 /* wait on receives */ 1813 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1814 count = nrecvs; 1815 slen = 0; 1816 while (count) { 1817 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1818 /* unpack receives into our local space */ 1819 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1820 1821 source[imdex] = recv_status.MPI_SOURCE; 1822 lens[imdex] = n; 1823 slen += n; 1824 count--; 1825 } 1826 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1827 1828 /* move the data into the send scatter */ 1829 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1830 count = 0; 1831 for (i=0; i<nrecvs; i++) { 1832 values = rvalues + i*nmax; 1833 for (j=0; j<lens[i]; j++) { 1834 lrows[count++] = values[j] - base; 1835 } 1836 } 1837 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1838 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1839 ierr = PetscFree(owner);CHKERRQ(ierr); 1840 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1841 1842 /* fix right hand side if needed */ 1843 if (x && b) { 1844 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1845 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1846 for (i=0; i<slen; i++) { 1847 bb[lrows[i]] = diag*xx[lrows[i]]; 1848 } 1849 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1850 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1851 } 1852 1853 /* actually zap the local rows */ 1854 /* 1855 Zero the required rows. If the "diagonal block" of the matrix 1856 is square and the user wishes to set the diagonal we use separate 1857 code so that MatSetValues() is not called for each diagonal allocating 1858 new memory, thus calling lots of mallocs and slowing things down. 1859 1860 */ 1861 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1862 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1863 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1864 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1865 } else if (diag != 0.0) { 1866 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1867 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1868 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1869 for (i=0; i<slen; i++) { 1870 row = lrows[i] + rstart_bs; 1871 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1872 } 1873 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1874 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1875 } else { 1876 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1877 } 1878 1879 ierr = PetscFree(lrows);CHKERRQ(ierr); 1880 1881 /* wait on sends */ 1882 if (nsends) { 1883 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1884 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1885 ierr = PetscFree(send_status);CHKERRQ(ierr); 1886 } 1887 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1888 ierr = PetscFree(svalues);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1894 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1895 { 1896 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatEqual_MPIBAIJ" 1908 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1909 { 1910 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1911 Mat a,b,c,d; 1912 PetscBool flg; 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 a = matA->A; b = matA->B; 1917 c = matB->A; d = matB->B; 1918 1919 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1920 if (flg) { 1921 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1922 } 1923 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatCopy_MPIBAIJ" 1929 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1930 { 1931 PetscErrorCode ierr; 1932 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1933 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1934 1935 PetscFunctionBegin; 1936 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1937 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1938 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1939 } else { 1940 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1941 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1942 } 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1948 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1949 { 1950 PetscErrorCode ierr; 1951 1952 PetscFunctionBegin; 1953 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1959 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1960 { 1961 PetscErrorCode ierr; 1962 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1963 PetscBLASInt bnz,one=1; 1964 Mat_SeqBAIJ *x,*y; 1965 1966 PetscFunctionBegin; 1967 if (str == SAME_NONZERO_PATTERN) { 1968 PetscScalar alpha = a; 1969 x = (Mat_SeqBAIJ*)xx->A->data; 1970 y = (Mat_SeqBAIJ*)yy->A->data; 1971 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1972 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1973 x = (Mat_SeqBAIJ*)xx->B->data; 1974 y = (Mat_SeqBAIJ*)yy->B->data; 1975 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1976 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1977 } else { 1978 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1979 } 1980 PetscFunctionReturn(0); 1981 } 1982 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1985 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1986 { 1987 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1988 PetscErrorCode ierr; 1989 1990 PetscFunctionBegin; 1991 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1992 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1998 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1999 { 2000 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2001 PetscErrorCode ierr; 2002 2003 PetscFunctionBegin; 2004 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2005 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2011 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2012 { 2013 PetscErrorCode ierr; 2014 IS iscol_local; 2015 PetscInt csize; 2016 2017 PetscFunctionBegin; 2018 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2019 if (call == MAT_REUSE_MATRIX) { 2020 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2021 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2022 } else { 2023 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2024 } 2025 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2026 if (call == MAT_INITIAL_MATRIX) { 2027 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2028 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2035 /* 2036 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2037 in local and then by concatenating the local matrices the end result. 2038 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2039 */ 2040 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2041 { 2042 PetscErrorCode ierr; 2043 PetscMPIInt rank,size; 2044 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2045 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2046 Mat M,Mreuse; 2047 MatScalar *vwork,*aa; 2048 MPI_Comm comm; 2049 IS isrow_new, iscol_new; 2050 PetscBool idflag,allrows, allcols; 2051 Mat_SeqBAIJ *aij; 2052 2053 PetscFunctionBegin; 2054 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2055 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2057 /* The compression and expansion should be avoided. Doesn't point 2058 out errors, might change the indices, hence buggey */ 2059 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2060 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2061 2062 /* Check for special case: each processor gets entire matrix columns */ 2063 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2064 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2065 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2066 else allcols = PETSC_FALSE; 2067 2068 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2069 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2070 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2071 else allrows = PETSC_FALSE; 2072 2073 if (call == MAT_REUSE_MATRIX) { 2074 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2075 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2076 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2077 } else { 2078 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2079 } 2080 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2081 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2082 /* 2083 m - number of local rows 2084 n - number of columns (same on all processors) 2085 rstart - first row in new global matrix generated 2086 */ 2087 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2088 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2089 m = m/bs; 2090 n = n/bs; 2091 2092 if (call == MAT_INITIAL_MATRIX) { 2093 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2094 ii = aij->i; 2095 jj = aij->j; 2096 2097 /* 2098 Determine the number of non-zeros in the diagonal and off-diagonal 2099 portions of the matrix in order to do correct preallocation 2100 */ 2101 2102 /* first get start and end of "diagonal" columns */ 2103 if (csize == PETSC_DECIDE) { 2104 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2105 if (mglobal == n*bs) { /* square matrix */ 2106 nlocal = m; 2107 } else { 2108 nlocal = n/size + ((n % size) > rank); 2109 } 2110 } else { 2111 nlocal = csize/bs; 2112 } 2113 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2114 rstart = rend - nlocal; 2115 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2116 2117 /* next, compute all the lengths */ 2118 ierr = PetscMalloc2(m+1,PetscInt,&dlens,m+1,PetscInt,&olens);CHKERRQ(ierr); 2119 for (i=0; i<m; i++) { 2120 jend = ii[i+1] - ii[i]; 2121 olen = 0; 2122 dlen = 0; 2123 for (j=0; j<jend; j++) { 2124 if (*jj < rstart || *jj >= rend) olen++; 2125 else dlen++; 2126 jj++; 2127 } 2128 olens[i] = olen; 2129 dlens[i] = dlen; 2130 } 2131 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2132 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2133 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2134 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2135 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2136 } else { 2137 PetscInt ml,nl; 2138 2139 M = *newmat; 2140 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2141 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2142 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2143 /* 2144 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2145 rather than the slower MatSetValues(). 2146 */ 2147 M->was_assembled = PETSC_TRUE; 2148 M->assembled = PETSC_FALSE; 2149 } 2150 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2151 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2152 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2153 ii = aij->i; 2154 jj = aij->j; 2155 aa = aij->a; 2156 for (i=0; i<m; i++) { 2157 row = rstart/bs + i; 2158 nz = ii[i+1] - ii[i]; 2159 cwork = jj; jj += nz; 2160 vwork = aa; aa += nz*bs*bs; 2161 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2162 } 2163 2164 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2166 *newmat = M; 2167 2168 /* save submatrix used in processor for next request */ 2169 if (call == MAT_INITIAL_MATRIX) { 2170 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2171 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatPermute_MPIBAIJ" 2178 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2179 { 2180 MPI_Comm comm,pcomm; 2181 PetscInt first,rlocal_size,clocal_size,nrows; 2182 const PetscInt *rows; 2183 PetscMPIInt size; 2184 IS crowp,growp,irowp,lrowp,lcolp; 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2189 /* make a collective version of 'rowp' */ 2190 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2191 if (pcomm==comm) { 2192 crowp = rowp; 2193 } else { 2194 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2195 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2196 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2197 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2198 } 2199 /* collect the global row permutation and invert it */ 2200 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2201 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2202 if (pcomm!=comm) { 2203 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2204 } 2205 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2206 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2207 /* get the local target indices */ 2208 ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr); 2209 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2210 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2211 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2212 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2213 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2214 /* the column permutation is so much easier; 2215 make a local version of 'colp' and invert it */ 2216 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2217 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2218 if (size==1) { 2219 lcolp = colp; 2220 } else { 2221 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2222 } 2223 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2224 /* now we just get the submatrix */ 2225 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2226 if (size>1) { 2227 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2228 } 2229 /* clean up */ 2230 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2236 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2237 { 2238 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2239 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2240 2241 PetscFunctionBegin; 2242 if (nghosts) *nghosts = B->nbs; 2243 if (ghosts) *ghosts = baij->garray; 2244 PetscFunctionReturn(0); 2245 } 2246 2247 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2248 2249 #undef __FUNCT__ 2250 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2251 /* 2252 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2253 */ 2254 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2255 { 2256 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2257 PetscErrorCode ierr; 2258 PetscMPIInt size,*ncolsonproc,*disp,nn; 2259 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2260 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog; 2261 PetscInt nis = iscoloring->n,nctot,*cols; 2262 PetscInt *rowhit,M,cstart,cend,colb; 2263 PetscInt *columnsforrow,l; 2264 IS *isa; 2265 PetscBool done,flg; 2266 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2267 PetscInt ctype=c->ctype; 2268 2269 PetscFunctionBegin; 2270 if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2271 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2272 2273 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 2274 else ltog = NULL; 2275 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2276 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2277 2278 M = mat->rmap->n/bs; 2279 cstart = mat->cmap->rstart/bs; 2280 cend = mat->cmap->rend/bs; 2281 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2282 c->N = mat->cmap->N/bs; 2283 c->m = mat->rmap->n/bs; 2284 c->rstart = mat->rmap->rstart/bs; 2285 2286 c->ncolors = nis; 2287 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2288 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2289 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2290 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2291 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2292 ierr = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2293 2294 /* Allow access to data structures of local part of matrix */ 2295 if (!baij->colmap) { 2296 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2297 } 2298 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2299 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2300 2301 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2302 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2303 2304 for (i=0; i<nis; i++) { 2305 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2306 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2307 2308 c->ncolumns[i] = n; 2309 if (n) { 2310 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2311 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 2312 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2313 } else { 2314 c->columns[i] = 0; 2315 } 2316 2317 if (ctype == IS_COLORING_GLOBAL) { 2318 /* Determine the total (parallel) number of columns of this color */ 2319 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 2320 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2321 2322 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 2323 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2324 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 2325 if (!nctot) { 2326 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2327 } 2328 2329 disp[0] = 0; 2330 for (j=1; j<size; j++) { 2331 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2332 } 2333 2334 /* Get complete list of columns for color on each processor */ 2335 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2336 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2337 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2338 } else if (ctype == IS_COLORING_GHOSTED) { 2339 /* Determine local number of columns of this color on this process, including ghost points */ 2340 nctot = n; 2341 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2342 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2343 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2344 2345 /* 2346 Mark all rows affect by these columns 2347 */ 2348 /* Temporary option to allow for debugging/testing */ 2349 flg = PETSC_FALSE; 2350 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 2351 if (!flg) { /*-----------------------------------------------------------------------------*/ 2352 /* crude, fast version */ 2353 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2354 /* loop over columns*/ 2355 for (j=0; j<nctot; j++) { 2356 if (ctype == IS_COLORING_GHOSTED) { 2357 col = ltog[cols[j]]; 2358 } else { 2359 col = cols[j]; 2360 } 2361 if (col >= cstart && col < cend) { 2362 /* column is in diagonal block of matrix */ 2363 rows = A_cj + A_ci[col-cstart]; 2364 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2365 } else { 2366 #if defined(PETSC_USE_CTABLE) 2367 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2368 colb--; 2369 #else 2370 colb = baij->colmap[col] - 1; 2371 #endif 2372 if (colb == -1) { 2373 m = 0; 2374 } else { 2375 colb = colb/bs; 2376 rows = B_cj + B_ci[colb]; 2377 m = B_ci[colb+1] - B_ci[colb]; 2378 } 2379 } 2380 /* loop over columns marking them in rowhit */ 2381 for (k=0; k<m; k++) { 2382 rowhit[*rows++] = col + 1; 2383 } 2384 } 2385 2386 /* count the number of hits */ 2387 nrows = 0; 2388 for (j=0; j<M; j++) { 2389 if (rowhit[j]) nrows++; 2390 } 2391 c->nrows[i] = nrows; 2392 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2393 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2394 ierr = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2395 nrows = 0; 2396 for (j=0; j<M; j++) { 2397 if (rowhit[j]) { 2398 c->rows[i][nrows] = j; 2399 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2400 nrows++; 2401 } 2402 } 2403 } else { /*-------------------------------------------------------------------------------*/ 2404 /* slow version, using rowhit as a linked list */ 2405 PetscInt currentcol,fm,mfm; 2406 rowhit[M] = M; 2407 nrows = 0; 2408 /* loop over columns*/ 2409 for (j=0; j<nctot; j++) { 2410 if (ctype == IS_COLORING_GHOSTED) { 2411 col = ltog[cols[j]]; 2412 } else { 2413 col = cols[j]; 2414 } 2415 if (col >= cstart && col < cend) { 2416 /* column is in diagonal block of matrix */ 2417 rows = A_cj + A_ci[col-cstart]; 2418 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2419 } else { 2420 #if defined(PETSC_USE_CTABLE) 2421 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2422 colb--; 2423 #else 2424 colb = baij->colmap[col] - 1; 2425 #endif 2426 if (colb == -1) { 2427 m = 0; 2428 } else { 2429 colb = colb/bs; 2430 rows = B_cj + B_ci[colb]; 2431 m = B_ci[colb+1] - B_ci[colb]; 2432 } 2433 } 2434 2435 /* loop over columns marking them in rowhit */ 2436 fm = M; /* fm points to first entry in linked list */ 2437 for (k=0; k<m; k++) { 2438 currentcol = *rows++; 2439 /* is it already in the list? */ 2440 do { 2441 mfm = fm; 2442 fm = rowhit[fm]; 2443 } while (fm < currentcol); 2444 /* not in list so add it */ 2445 if (fm != currentcol) { 2446 nrows++; 2447 columnsforrow[currentcol] = col; 2448 /* next three lines insert new entry into linked list */ 2449 rowhit[mfm] = currentcol; 2450 rowhit[currentcol] = fm; 2451 fm = currentcol; 2452 /* fm points to present position in list since we know the columns are sorted */ 2453 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2454 } 2455 } 2456 c->nrows[i] = nrows; 2457 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2458 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2459 ierr = PetscLogObjectMemory((PetscObject)c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2460 /* now store the linked list of rows into c->rows[i] */ 2461 nrows = 0; 2462 fm = rowhit[M]; 2463 do { 2464 c->rows[i][nrows] = fm; 2465 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2466 fm = rowhit[fm]; 2467 } while (fm < M); 2468 } /* ---------------------------------------------------------------------------------------*/ 2469 ierr = PetscFree(cols);CHKERRQ(ierr); 2470 } 2471 2472 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2473 /* 2474 vscale will contain the "diagonal" on processor scalings followed by the off processor 2475 */ 2476 if (ctype == IS_COLORING_GLOBAL) { 2477 PetscInt *garray; 2478 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2479 for (i=0; i<baij->B->cmap->n/bs; i++) { 2480 for (j=0; j<bs; j++) { 2481 garray[i*bs+j] = bs*baij->garray[i]+j; 2482 } 2483 } 2484 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2485 ierr = PetscFree(garray);CHKERRQ(ierr); 2486 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2487 for (k=0; k<c->ncolors; k++) { 2488 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2489 for (l=0; l<c->nrows[k]; l++) { 2490 col = c->columnsforrow[k][l]; 2491 if (col >= cstart && col < cend) { 2492 /* column is in diagonal block of matrix */ 2493 colb = col - cstart; 2494 } else { 2495 /* column is in "off-processor" part */ 2496 #if defined(PETSC_USE_CTABLE) 2497 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2498 colb--; 2499 #else 2500 colb = baij->colmap[col] - 1; 2501 #endif 2502 colb = colb/bs; 2503 colb += cend - cstart; 2504 } 2505 c->vscaleforrow[k][l] = colb; 2506 } 2507 } 2508 } else if (ctype == IS_COLORING_GHOSTED) { 2509 /* Get gtol mapping */ 2510 PetscInt N = mat->cmap->N,nlocal,*gtol; 2511 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2512 for (i=0; i<N; i++) gtol[i] = -1; 2513 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 2514 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 2515 2516 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2517 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2518 for (k=0; k<c->ncolors; k++) { 2519 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2520 for (l=0; l<c->nrows[k]; l++) { 2521 col = c->columnsforrow[k][l]; /* global column index */ 2522 2523 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2524 } 2525 } 2526 ierr = PetscFree(gtol);CHKERRQ(ierr); 2527 } 2528 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2529 2530 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2531 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2532 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2533 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2534 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2540 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2541 { 2542 Mat B; 2543 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2544 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2545 Mat_SeqAIJ *b; 2546 PetscErrorCode ierr; 2547 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2548 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2549 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2550 2551 PetscFunctionBegin; 2552 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2553 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2554 2555 /* ---------------------------------------------------------------- 2556 Tell every processor the number of nonzeros per row 2557 */ 2558 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2559 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2560 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2561 } 2562 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2563 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2564 displs = recvcounts + size; 2565 for (i=0; i<size; i++) { 2566 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2567 displs[i] = A->rmap->range[i]/bs; 2568 } 2569 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2570 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2571 #else 2572 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2573 #endif 2574 /* --------------------------------------------------------------- 2575 Create the sequential matrix of the same type as the local block diagonal 2576 */ 2577 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2578 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2579 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2580 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2581 b = (Mat_SeqAIJ*)B->data; 2582 2583 /*-------------------------------------------------------------------- 2584 Copy my part of matrix column indices over 2585 */ 2586 sendcount = ad->nz + bd->nz; 2587 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2588 a_jsendbuf = ad->j; 2589 b_jsendbuf = bd->j; 2590 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2591 cnt = 0; 2592 for (i=0; i<n; i++) { 2593 2594 /* put in lower diagonal portion */ 2595 m = bd->i[i+1] - bd->i[i]; 2596 while (m > 0) { 2597 /* is it above diagonal (in bd (compressed) numbering) */ 2598 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2599 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2600 m--; 2601 } 2602 2603 /* put in diagonal portion */ 2604 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2605 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2606 } 2607 2608 /* put in upper diagonal portion */ 2609 while (m-- > 0) { 2610 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2611 } 2612 } 2613 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2614 2615 /*-------------------------------------------------------------------- 2616 Gather all column indices to all processors 2617 */ 2618 for (i=0; i<size; i++) { 2619 recvcounts[i] = 0; 2620 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2621 recvcounts[i] += lens[j]; 2622 } 2623 } 2624 displs[0] = 0; 2625 for (i=1; i<size; i++) { 2626 displs[i] = displs[i-1] + recvcounts[i-1]; 2627 } 2628 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2629 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2630 #else 2631 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2632 #endif 2633 /*-------------------------------------------------------------------- 2634 Assemble the matrix into useable form (note numerical values not yet set) 2635 */ 2636 /* set the b->ilen (length of each row) values */ 2637 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2638 /* set the b->i indices */ 2639 b->i[0] = 0; 2640 for (i=1; i<=A->rmap->N/bs; i++) { 2641 b->i[i] = b->i[i-1] + lens[i-1]; 2642 } 2643 ierr = PetscFree(lens);CHKERRQ(ierr); 2644 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2645 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2646 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2647 2648 if (A->symmetric) { 2649 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2650 } else if (A->hermitian) { 2651 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2652 } else if (A->structurally_symmetric) { 2653 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2654 } 2655 *newmat = B; 2656 PetscFunctionReturn(0); 2657 } 2658 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatSOR_MPIBAIJ" 2661 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2662 { 2663 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2664 PetscErrorCode ierr; 2665 Vec bb1 = 0; 2666 2667 PetscFunctionBegin; 2668 if (flag == SOR_APPLY_UPPER) { 2669 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2670 PetscFunctionReturn(0); 2671 } 2672 2673 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2674 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2675 } 2676 2677 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2678 if (flag & SOR_ZERO_INITIAL_GUESS) { 2679 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2680 its--; 2681 } 2682 2683 while (its--) { 2684 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2685 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2686 2687 /* update rhs: bb1 = bb - B*x */ 2688 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2689 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2690 2691 /* local sweep */ 2692 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2693 } 2694 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2695 if (flag & SOR_ZERO_INITIAL_GUESS) { 2696 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2697 its--; 2698 } 2699 while (its--) { 2700 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2701 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2702 2703 /* update rhs: bb1 = bb - B*x */ 2704 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2705 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2706 2707 /* local sweep */ 2708 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2709 } 2710 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2711 if (flag & SOR_ZERO_INITIAL_GUESS) { 2712 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2713 its--; 2714 } 2715 while (its--) { 2716 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 2719 /* update rhs: bb1 = bb - B*x */ 2720 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2721 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2722 2723 /* local sweep */ 2724 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2725 } 2726 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2727 2728 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2733 2734 #undef __FUNCT__ 2735 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2736 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2737 { 2738 PetscErrorCode ierr; 2739 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2740 PetscInt N,i,*garray = aij->garray; 2741 PetscInt ib,jb,bs = A->rmap->bs; 2742 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2743 MatScalar *a_val = a_aij->a; 2744 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2745 MatScalar *b_val = b_aij->a; 2746 PetscReal *work; 2747 2748 PetscFunctionBegin; 2749 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2750 ierr = PetscMalloc(N*sizeof(PetscReal),&work);CHKERRQ(ierr); 2751 ierr = PetscMemzero(work,N*sizeof(PetscReal));CHKERRQ(ierr); 2752 if (type == NORM_2) { 2753 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2754 for (jb=0; jb<bs; jb++) { 2755 for (ib=0; ib<bs; ib++) { 2756 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2757 a_val++; 2758 } 2759 } 2760 } 2761 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2762 for (jb=0; jb<bs; jb++) { 2763 for (ib=0; ib<bs; ib++) { 2764 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2765 b_val++; 2766 } 2767 } 2768 } 2769 } else if (type == NORM_1) { 2770 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2771 for (jb=0; jb<bs; jb++) { 2772 for (ib=0; ib<bs; ib++) { 2773 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2774 a_val++; 2775 } 2776 } 2777 } 2778 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2779 for (jb=0; jb<bs; jb++) { 2780 for (ib=0; ib<bs; ib++) { 2781 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2782 b_val++; 2783 } 2784 } 2785 } 2786 } else if (type == NORM_INFINITY) { 2787 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2788 for (jb=0; jb<bs; jb++) { 2789 for (ib=0; ib<bs; ib++) { 2790 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2791 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2792 a_val++; 2793 } 2794 } 2795 } 2796 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2797 for (jb=0; jb<bs; jb++) { 2798 for (ib=0; ib<bs; ib++) { 2799 int col = garray[b_aij->j[i]] * bs + jb; 2800 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2801 b_val++; 2802 } 2803 } 2804 } 2805 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2806 if (type == NORM_INFINITY) { 2807 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2808 } else { 2809 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2810 } 2811 ierr = PetscFree(work);CHKERRQ(ierr); 2812 if (type == NORM_2) { 2813 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2814 } 2815 PetscFunctionReturn(0); 2816 } 2817 2818 #undef __FUNCT__ 2819 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2820 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2821 { 2822 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2823 PetscErrorCode ierr; 2824 2825 PetscFunctionBegin; 2826 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 2831 /* -------------------------------------------------------------------*/ 2832 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2833 MatGetRow_MPIBAIJ, 2834 MatRestoreRow_MPIBAIJ, 2835 MatMult_MPIBAIJ, 2836 /* 4*/ MatMultAdd_MPIBAIJ, 2837 MatMultTranspose_MPIBAIJ, 2838 MatMultTransposeAdd_MPIBAIJ, 2839 0, 2840 0, 2841 0, 2842 /*10*/ 0, 2843 0, 2844 0, 2845 MatSOR_MPIBAIJ, 2846 MatTranspose_MPIBAIJ, 2847 /*15*/ MatGetInfo_MPIBAIJ, 2848 MatEqual_MPIBAIJ, 2849 MatGetDiagonal_MPIBAIJ, 2850 MatDiagonalScale_MPIBAIJ, 2851 MatNorm_MPIBAIJ, 2852 /*20*/ MatAssemblyBegin_MPIBAIJ, 2853 MatAssemblyEnd_MPIBAIJ, 2854 MatSetOption_MPIBAIJ, 2855 MatZeroEntries_MPIBAIJ, 2856 /*24*/ MatZeroRows_MPIBAIJ, 2857 0, 2858 0, 2859 0, 2860 0, 2861 /*29*/ MatSetUp_MPIBAIJ, 2862 0, 2863 0, 2864 0, 2865 0, 2866 /*34*/ MatDuplicate_MPIBAIJ, 2867 0, 2868 0, 2869 0, 2870 0, 2871 /*39*/ MatAXPY_MPIBAIJ, 2872 MatGetSubMatrices_MPIBAIJ, 2873 MatIncreaseOverlap_MPIBAIJ, 2874 MatGetValues_MPIBAIJ, 2875 MatCopy_MPIBAIJ, 2876 /*44*/ 0, 2877 MatScale_MPIBAIJ, 2878 0, 2879 0, 2880 0, 2881 /*49*/ 0, 2882 0, 2883 0, 2884 0, 2885 0, 2886 /*54*/ MatFDColoringCreate_MPIBAIJ, 2887 0, 2888 MatSetUnfactored_MPIBAIJ, 2889 MatPermute_MPIBAIJ, 2890 MatSetValuesBlocked_MPIBAIJ, 2891 /*59*/ MatGetSubMatrix_MPIBAIJ, 2892 MatDestroy_MPIBAIJ, 2893 MatView_MPIBAIJ, 2894 0, 2895 0, 2896 /*64*/ 0, 2897 0, 2898 0, 2899 0, 2900 0, 2901 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2902 0, 2903 0, 2904 0, 2905 0, 2906 /*74*/ 0, 2907 MatFDColoringApply_BAIJ, 2908 0, 2909 0, 2910 0, 2911 /*79*/ 0, 2912 0, 2913 0, 2914 0, 2915 MatLoad_MPIBAIJ, 2916 /*84*/ 0, 2917 0, 2918 0, 2919 0, 2920 0, 2921 /*89*/ 0, 2922 0, 2923 0, 2924 0, 2925 0, 2926 /*94*/ 0, 2927 0, 2928 0, 2929 0, 2930 0, 2931 /*99*/ 0, 2932 0, 2933 0, 2934 0, 2935 0, 2936 /*104*/0, 2937 MatRealPart_MPIBAIJ, 2938 MatImaginaryPart_MPIBAIJ, 2939 0, 2940 0, 2941 /*109*/0, 2942 0, 2943 0, 2944 0, 2945 0, 2946 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2947 0, 2948 MatGetGhosts_MPIBAIJ, 2949 0, 2950 0, 2951 /*119*/0, 2952 0, 2953 0, 2954 0, 2955 MatGetMultiProcBlock_MPIBAIJ, 2956 /*124*/0, 2957 MatGetColumnNorms_MPIBAIJ, 2958 MatInvertBlockDiagonal_MPIBAIJ, 2959 0, 2960 0, 2961 /*129*/ 0, 2962 0, 2963 0, 2964 0, 2965 0, 2966 /*134*/ 0, 2967 0, 2968 0, 2969 0, 2970 0, 2971 /*139*/ 0, 2972 0, 2973 0 2974 }; 2975 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2978 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2979 { 2980 PetscFunctionBegin; 2981 *a = ((Mat_MPIBAIJ*)A->data)->A; 2982 PetscFunctionReturn(0); 2983 } 2984 2985 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2986 2987 #undef __FUNCT__ 2988 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2989 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2990 { 2991 PetscInt m,rstart,cstart,cend; 2992 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2993 const PetscInt *JJ =0; 2994 PetscScalar *values=0; 2995 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2996 PetscErrorCode ierr; 2997 2998 PetscFunctionBegin; 2999 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3000 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3001 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3002 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3003 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3004 m = B->rmap->n/bs; 3005 rstart = B->rmap->rstart/bs; 3006 cstart = B->cmap->rstart/bs; 3007 cend = B->cmap->rend/bs; 3008 3009 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 3010 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3011 for (i=0; i<m; i++) { 3012 nz = ii[i+1] - ii[i]; 3013 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 3014 nz_max = PetscMax(nz_max,nz); 3015 JJ = jj + ii[i]; 3016 for (j=0; j<nz; j++) { 3017 if (*JJ >= cstart) break; 3018 JJ++; 3019 } 3020 d = 0; 3021 for (; j<nz; j++) { 3022 if (*JJ++ >= cend) break; 3023 d++; 3024 } 3025 d_nnz[i] = d; 3026 o_nnz[i] = nz - d; 3027 } 3028 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3029 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3030 3031 values = (PetscScalar*)V; 3032 if (!values) { 3033 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3034 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3035 } 3036 for (i=0; i<m; i++) { 3037 PetscInt row = i + rstart; 3038 PetscInt ncols = ii[i+1] - ii[i]; 3039 const PetscInt *icols = jj + ii[i]; 3040 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 3041 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3042 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3043 } else { /* block ordering does not match so we can only insert one block at a time. */ 3044 PetscInt j; 3045 for (j=0; j<ncols; j++) { 3046 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3047 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3048 } 3049 } 3050 } 3051 3052 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3053 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3056 PetscFunctionReturn(0); 3057 } 3058 3059 #undef __FUNCT__ 3060 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 3061 /*@C 3062 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 3063 (the default parallel PETSc format). 3064 3065 Collective on MPI_Comm 3066 3067 Input Parameters: 3068 + A - the matrix 3069 . bs - the block size 3070 . i - the indices into j for the start of each local row (starts with zero) 3071 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3072 - v - optional values in the matrix 3073 3074 Level: developer 3075 3076 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3077 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3078 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3079 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3080 block column and the second index is over columns within a block. 3081 3082 .keywords: matrix, aij, compressed row, sparse, parallel 3083 3084 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 3085 @*/ 3086 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3087 { 3088 PetscErrorCode ierr; 3089 3090 PetscFunctionBegin; 3091 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3092 PetscValidType(B,1); 3093 PetscValidLogicalCollectiveInt(B,bs,2); 3094 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3100 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 3101 { 3102 Mat_MPIBAIJ *b; 3103 PetscErrorCode ierr; 3104 PetscInt i; 3105 3106 PetscFunctionBegin; 3107 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3108 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3109 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3110 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3111 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3112 3113 if (d_nnz) { 3114 for (i=0; i<B->rmap->n/bs; i++) { 3115 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3116 } 3117 } 3118 if (o_nnz) { 3119 for (i=0; i<B->rmap->n/bs; i++) { 3120 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3121 } 3122 } 3123 3124 b = (Mat_MPIBAIJ*)B->data; 3125 b->bs2 = bs*bs; 3126 b->mbs = B->rmap->n/bs; 3127 b->nbs = B->cmap->n/bs; 3128 b->Mbs = B->rmap->N/bs; 3129 b->Nbs = B->cmap->N/bs; 3130 3131 for (i=0; i<=b->size; i++) { 3132 b->rangebs[i] = B->rmap->range[i]/bs; 3133 } 3134 b->rstartbs = B->rmap->rstart/bs; 3135 b->rendbs = B->rmap->rend/bs; 3136 b->cstartbs = B->cmap->rstart/bs; 3137 b->cendbs = B->cmap->rend/bs; 3138 3139 if (!B->preallocated) { 3140 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3141 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3142 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3143 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3144 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3145 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3146 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3147 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3148 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3149 } 3150 3151 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3152 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3153 B->preallocated = PETSC_TRUE; 3154 PetscFunctionReturn(0); 3155 } 3156 3157 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3158 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3159 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3162 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3163 { 3164 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3165 PetscErrorCode ierr; 3166 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3167 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3168 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3169 3170 PetscFunctionBegin; 3171 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3172 ii[0] = 0; 3173 for (i=0; i<M; i++) { 3174 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3175 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3176 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3177 /* remove one from count of matrix has diagonal */ 3178 for (j=id[i]; j<id[i+1]; j++) { 3179 if (jd[j] == i) {ii[i+1]--;break;} 3180 } 3181 } 3182 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3183 cnt = 0; 3184 for (i=0; i<M; i++) { 3185 for (j=io[i]; j<io[i+1]; j++) { 3186 if (garray[jo[j]] > rstart) break; 3187 jj[cnt++] = garray[jo[j]]; 3188 } 3189 for (k=id[i]; k<id[i+1]; k++) { 3190 if (jd[k] != i) { 3191 jj[cnt++] = rstart + jd[k]; 3192 } 3193 } 3194 for (; j<io[i+1]; j++) { 3195 jj[cnt++] = garray[jo[j]]; 3196 } 3197 } 3198 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3199 PetscFunctionReturn(0); 3200 } 3201 3202 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3203 3204 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3205 3206 #undef __FUNCT__ 3207 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3208 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3209 { 3210 PetscErrorCode ierr; 3211 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3212 Mat B; 3213 Mat_MPIAIJ *b; 3214 3215 PetscFunctionBegin; 3216 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3217 3218 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3219 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3220 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3221 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3222 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3223 b = (Mat_MPIAIJ*) B->data; 3224 3225 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3226 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3227 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3228 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3229 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3230 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3231 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3232 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3233 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3234 if (reuse == MAT_REUSE_MATRIX) { 3235 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3236 } else { 3237 *newmat = B; 3238 } 3239 PetscFunctionReturn(0); 3240 } 3241 3242 #if defined(PETSC_HAVE_MUMPS) 3243 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3244 #endif 3245 3246 /*MC 3247 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3248 3249 Options Database Keys: 3250 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3251 . -mat_block_size <bs> - set the blocksize used to store the matrix 3252 - -mat_use_hash_table <fact> 3253 3254 Level: beginner 3255 3256 .seealso: MatCreateMPIBAIJ 3257 M*/ 3258 3259 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3260 3261 #undef __FUNCT__ 3262 #define __FUNCT__ "MatCreate_MPIBAIJ" 3263 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3264 { 3265 Mat_MPIBAIJ *b; 3266 PetscErrorCode ierr; 3267 PetscBool flg; 3268 3269 PetscFunctionBegin; 3270 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3271 B->data = (void*)b; 3272 3273 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3274 B->assembled = PETSC_FALSE; 3275 3276 B->insertmode = NOT_SET_VALUES; 3277 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3278 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3279 3280 /* build local table of row and column ownerships */ 3281 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3282 3283 /* build cache for off array entries formed */ 3284 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3285 3286 b->donotstash = PETSC_FALSE; 3287 b->colmap = NULL; 3288 b->garray = NULL; 3289 b->roworiented = PETSC_TRUE; 3290 3291 /* stuff used in block assembly */ 3292 b->barray = 0; 3293 3294 /* stuff used for matrix vector multiply */ 3295 b->lvec = 0; 3296 b->Mvctx = 0; 3297 3298 /* stuff for MatGetRow() */ 3299 b->rowindices = 0; 3300 b->rowvalues = 0; 3301 b->getrowactive = PETSC_FALSE; 3302 3303 /* hash table stuff */ 3304 b->ht = 0; 3305 b->hd = 0; 3306 b->ht_size = 0; 3307 b->ht_flag = PETSC_FALSE; 3308 b->ht_fact = 0; 3309 b->ht_total_ct = 0; 3310 b->ht_insert_ct = 0; 3311 3312 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3313 b->ijonly = PETSC_FALSE; 3314 3315 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3316 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3317 if (flg) { 3318 PetscReal fact = 1.39; 3319 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3320 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3321 if (fact <= 1.0) fact = 1.39; 3322 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3323 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3324 } 3325 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3326 3327 #if defined(PETSC_HAVE_MUMPS) 3328 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3329 #endif 3330 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3331 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3332 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3333 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3334 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3335 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3336 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3337 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3338 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3339 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3340 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3341 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3342 PetscFunctionReturn(0); 3343 } 3344 3345 /*MC 3346 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3347 3348 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3349 and MATMPIBAIJ otherwise. 3350 3351 Options Database Keys: 3352 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3353 3354 Level: beginner 3355 3356 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3357 M*/ 3358 3359 #undef __FUNCT__ 3360 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3361 /*@C 3362 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3363 (block compressed row). For good matrix assembly performance 3364 the user should preallocate the matrix storage by setting the parameters 3365 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3366 performance can be increased by more than a factor of 50. 3367 3368 Collective on Mat 3369 3370 Input Parameters: 3371 + A - the matrix 3372 . bs - size of block 3373 . d_nz - number of block nonzeros per block row in diagonal portion of local 3374 submatrix (same for all local rows) 3375 . d_nnz - array containing the number of block nonzeros in the various block rows 3376 of the in diagonal portion of the local (possibly different for each block 3377 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3378 set it even if it is zero. 3379 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3380 submatrix (same for all local rows). 3381 - o_nnz - array containing the number of nonzeros in the various block rows of the 3382 off-diagonal portion of the local submatrix (possibly different for 3383 each block row) or NULL. 3384 3385 If the *_nnz parameter is given then the *_nz parameter is ignored 3386 3387 Options Database Keys: 3388 + -mat_block_size - size of the blocks to use 3389 - -mat_use_hash_table <fact> 3390 3391 Notes: 3392 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3393 than it must be used on all processors that share the object for that argument. 3394 3395 Storage Information: 3396 For a square global matrix we define each processor's diagonal portion 3397 to be its local rows and the corresponding columns (a square submatrix); 3398 each processor's off-diagonal portion encompasses the remainder of the 3399 local matrix (a rectangular submatrix). 3400 3401 The user can specify preallocated storage for the diagonal part of 3402 the local submatrix with either d_nz or d_nnz (not both). Set 3403 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3404 memory allocation. Likewise, specify preallocated storage for the 3405 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3406 3407 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3408 the figure below we depict these three local rows and all columns (0-11). 3409 3410 .vb 3411 0 1 2 3 4 5 6 7 8 9 10 11 3412 -------------------------- 3413 row 3 |o o o d d d o o o o o o 3414 row 4 |o o o d d d o o o o o o 3415 row 5 |o o o d d d o o o o o o 3416 -------------------------- 3417 .ve 3418 3419 Thus, any entries in the d locations are stored in the d (diagonal) 3420 submatrix, and any entries in the o locations are stored in the 3421 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3422 stored simply in the MATSEQBAIJ format for compressed row storage. 3423 3424 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3425 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3426 In general, for PDE problems in which most nonzeros are near the diagonal, 3427 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3428 or you will get TERRIBLE performance; see the users' manual chapter on 3429 matrices. 3430 3431 You can call MatGetInfo() to get information on how effective the preallocation was; 3432 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3433 You can also run with the option -info and look for messages with the string 3434 malloc in them to see if additional memory allocation was needed. 3435 3436 Level: intermediate 3437 3438 .keywords: matrix, block, aij, compressed row, sparse, parallel 3439 3440 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3441 @*/ 3442 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3443 { 3444 PetscErrorCode ierr; 3445 3446 PetscFunctionBegin; 3447 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3448 PetscValidType(B,1); 3449 PetscValidLogicalCollectiveInt(B,bs,2); 3450 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3451 PetscFunctionReturn(0); 3452 } 3453 3454 #undef __FUNCT__ 3455 #define __FUNCT__ "MatCreateBAIJ" 3456 /*@C 3457 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3458 (block compressed row). For good matrix assembly performance 3459 the user should preallocate the matrix storage by setting the parameters 3460 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3461 performance can be increased by more than a factor of 50. 3462 3463 Collective on MPI_Comm 3464 3465 Input Parameters: 3466 + comm - MPI communicator 3467 . bs - size of blockk 3468 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3469 This value should be the same as the local size used in creating the 3470 y vector for the matrix-vector product y = Ax. 3471 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3472 This value should be the same as the local size used in creating the 3473 x vector for the matrix-vector product y = Ax. 3474 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3475 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3476 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3477 submatrix (same for all local rows) 3478 . d_nnz - array containing the number of nonzero blocks in the various block rows 3479 of the in diagonal portion of the local (possibly different for each block 3480 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3481 and set it even if it is zero. 3482 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3483 submatrix (same for all local rows). 3484 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3485 off-diagonal portion of the local submatrix (possibly different for 3486 each block row) or NULL. 3487 3488 Output Parameter: 3489 . A - the matrix 3490 3491 Options Database Keys: 3492 + -mat_block_size - size of the blocks to use 3493 - -mat_use_hash_table <fact> 3494 3495 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3496 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3497 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3498 3499 Notes: 3500 If the *_nnz parameter is given then the *_nz parameter is ignored 3501 3502 A nonzero block is any block that as 1 or more nonzeros in it 3503 3504 The user MUST specify either the local or global matrix dimensions 3505 (possibly both). 3506 3507 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3508 than it must be used on all processors that share the object for that argument. 3509 3510 Storage Information: 3511 For a square global matrix we define each processor's diagonal portion 3512 to be its local rows and the corresponding columns (a square submatrix); 3513 each processor's off-diagonal portion encompasses the remainder of the 3514 local matrix (a rectangular submatrix). 3515 3516 The user can specify preallocated storage for the diagonal part of 3517 the local submatrix with either d_nz or d_nnz (not both). Set 3518 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3519 memory allocation. Likewise, specify preallocated storage for the 3520 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3521 3522 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3523 the figure below we depict these three local rows and all columns (0-11). 3524 3525 .vb 3526 0 1 2 3 4 5 6 7 8 9 10 11 3527 -------------------------- 3528 row 3 |o o o d d d o o o o o o 3529 row 4 |o o o d d d o o o o o o 3530 row 5 |o o o d d d o o o o o o 3531 -------------------------- 3532 .ve 3533 3534 Thus, any entries in the d locations are stored in the d (diagonal) 3535 submatrix, and any entries in the o locations are stored in the 3536 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3537 stored simply in the MATSEQBAIJ format for compressed row storage. 3538 3539 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3540 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3541 In general, for PDE problems in which most nonzeros are near the diagonal, 3542 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3543 or you will get TERRIBLE performance; see the users' manual chapter on 3544 matrices. 3545 3546 Level: intermediate 3547 3548 .keywords: matrix, block, aij, compressed row, sparse, parallel 3549 3550 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3551 @*/ 3552 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3553 { 3554 PetscErrorCode ierr; 3555 PetscMPIInt size; 3556 3557 PetscFunctionBegin; 3558 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3559 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3560 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3561 if (size > 1) { 3562 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3563 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3564 } else { 3565 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3566 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3567 } 3568 PetscFunctionReturn(0); 3569 } 3570 3571 #undef __FUNCT__ 3572 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3573 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3574 { 3575 Mat mat; 3576 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3577 PetscErrorCode ierr; 3578 PetscInt len=0; 3579 3580 PetscFunctionBegin; 3581 *newmat = 0; 3582 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3583 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3584 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3585 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3586 3587 mat->factortype = matin->factortype; 3588 mat->preallocated = PETSC_TRUE; 3589 mat->assembled = PETSC_TRUE; 3590 mat->insertmode = NOT_SET_VALUES; 3591 3592 a = (Mat_MPIBAIJ*)mat->data; 3593 mat->rmap->bs = matin->rmap->bs; 3594 a->bs2 = oldmat->bs2; 3595 a->mbs = oldmat->mbs; 3596 a->nbs = oldmat->nbs; 3597 a->Mbs = oldmat->Mbs; 3598 a->Nbs = oldmat->Nbs; 3599 3600 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3601 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3602 3603 a->size = oldmat->size; 3604 a->rank = oldmat->rank; 3605 a->donotstash = oldmat->donotstash; 3606 a->roworiented = oldmat->roworiented; 3607 a->rowindices = 0; 3608 a->rowvalues = 0; 3609 a->getrowactive = PETSC_FALSE; 3610 a->barray = 0; 3611 a->rstartbs = oldmat->rstartbs; 3612 a->rendbs = oldmat->rendbs; 3613 a->cstartbs = oldmat->cstartbs; 3614 a->cendbs = oldmat->cendbs; 3615 3616 /* hash table stuff */ 3617 a->ht = 0; 3618 a->hd = 0; 3619 a->ht_size = 0; 3620 a->ht_flag = oldmat->ht_flag; 3621 a->ht_fact = oldmat->ht_fact; 3622 a->ht_total_ct = 0; 3623 a->ht_insert_ct = 0; 3624 3625 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3626 if (oldmat->colmap) { 3627 #if defined(PETSC_USE_CTABLE) 3628 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3629 #else 3630 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3631 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3632 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3633 #endif 3634 } else a->colmap = 0; 3635 3636 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3637 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3638 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3639 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3640 } else a->garray = 0; 3641 3642 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3643 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3644 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3645 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3646 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3647 3648 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3649 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3650 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3651 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3652 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3653 *newmat = mat; 3654 PetscFunctionReturn(0); 3655 } 3656 3657 #undef __FUNCT__ 3658 #define __FUNCT__ "MatLoad_MPIBAIJ" 3659 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3660 { 3661 PetscErrorCode ierr; 3662 int fd; 3663 PetscInt i,nz,j,rstart,rend; 3664 PetscScalar *vals,*buf; 3665 MPI_Comm comm; 3666 MPI_Status status; 3667 PetscMPIInt rank,size,maxnz; 3668 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3669 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3670 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3671 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3672 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3673 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3674 3675 PetscFunctionBegin; 3676 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3677 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3678 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3679 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3680 3681 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3682 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3683 if (!rank) { 3684 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3685 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3686 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3687 } 3688 3689 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3690 3691 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3692 M = header[1]; N = header[2]; 3693 3694 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3695 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3696 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3697 3698 /* If global sizes are set, check if they are consistent with that given in the file */ 3699 if (sizesset) { 3700 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3701 } 3702 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3703 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3704 3705 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3706 3707 /* 3708 This code adds extra rows to make sure the number of rows is 3709 divisible by the blocksize 3710 */ 3711 Mbs = M/bs; 3712 extra_rows = bs - M + bs*Mbs; 3713 if (extra_rows == bs) extra_rows = 0; 3714 else Mbs++; 3715 if (extra_rows && !rank) { 3716 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3717 } 3718 3719 /* determine ownership of all rows */ 3720 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3721 mbs = Mbs/size + ((Mbs % size) > rank); 3722 m = mbs*bs; 3723 } else { /* User set */ 3724 m = newmat->rmap->n; 3725 mbs = m/bs; 3726 } 3727 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3728 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3729 3730 /* process 0 needs enough room for process with most rows */ 3731 if (!rank) { 3732 mmax = rowners[1]; 3733 for (i=2; i<=size; i++) { 3734 mmax = PetscMax(mmax,rowners[i]); 3735 } 3736 mmax*=bs; 3737 } else mmax = -1; /* unused, but compiler warns anyway */ 3738 3739 rowners[0] = 0; 3740 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3741 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3742 rstart = rowners[rank]; 3743 rend = rowners[rank+1]; 3744 3745 /* distribute row lengths to all processors */ 3746 ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3747 if (!rank) { 3748 mend = m; 3749 if (size == 1) mend = mend - extra_rows; 3750 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3751 for (j=mend; j<m; j++) locrowlens[j] = 1; 3752 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3753 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3754 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3755 for (j=0; j<m; j++) { 3756 procsnz[0] += locrowlens[j]; 3757 } 3758 for (i=1; i<size; i++) { 3759 mend = browners[i+1] - browners[i]; 3760 if (i == size-1) mend = mend - extra_rows; 3761 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3762 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3763 /* calculate the number of nonzeros on each processor */ 3764 for (j=0; j<browners[i+1]-browners[i]; j++) { 3765 procsnz[i] += rowlengths[j]; 3766 } 3767 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3768 } 3769 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3770 } else { 3771 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3772 } 3773 3774 if (!rank) { 3775 /* determine max buffer needed and allocate it */ 3776 maxnz = procsnz[0]; 3777 for (i=1; i<size; i++) { 3778 maxnz = PetscMax(maxnz,procsnz[i]); 3779 } 3780 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3781 3782 /* read in my part of the matrix column indices */ 3783 nz = procsnz[0]; 3784 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3785 mycols = ibuf; 3786 if (size == 1) nz -= extra_rows; 3787 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3788 if (size == 1) { 3789 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3790 } 3791 3792 /* read in every ones (except the last) and ship off */ 3793 for (i=1; i<size-1; i++) { 3794 nz = procsnz[i]; 3795 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3796 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3797 } 3798 /* read in the stuff for the last proc */ 3799 if (size != 1) { 3800 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3801 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3802 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3803 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3804 } 3805 ierr = PetscFree(cols);CHKERRQ(ierr); 3806 } else { 3807 /* determine buffer space needed for message */ 3808 nz = 0; 3809 for (i=0; i<m; i++) { 3810 nz += locrowlens[i]; 3811 } 3812 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3813 mycols = ibuf; 3814 /* receive message of column indices*/ 3815 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3816 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3817 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3818 } 3819 3820 /* loop over local rows, determining number of off diagonal entries */ 3821 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3822 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3823 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3824 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3825 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3826 rowcount = 0; nzcount = 0; 3827 for (i=0; i<mbs; i++) { 3828 dcount = 0; 3829 odcount = 0; 3830 for (j=0; j<bs; j++) { 3831 kmax = locrowlens[rowcount]; 3832 for (k=0; k<kmax; k++) { 3833 tmp = mycols[nzcount++]/bs; 3834 if (!mask[tmp]) { 3835 mask[tmp] = 1; 3836 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3837 else masked1[dcount++] = tmp; 3838 } 3839 } 3840 rowcount++; 3841 } 3842 3843 dlens[i] = dcount; 3844 odlens[i] = odcount; 3845 3846 /* zero out the mask elements we set */ 3847 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3848 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3849 } 3850 3851 3852 if (!sizesset) { 3853 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3854 } 3855 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3856 3857 if (!rank) { 3858 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3859 /* read in my part of the matrix numerical values */ 3860 nz = procsnz[0]; 3861 vals = buf; 3862 mycols = ibuf; 3863 if (size == 1) nz -= extra_rows; 3864 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3865 if (size == 1) { 3866 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3867 } 3868 3869 /* insert into matrix */ 3870 jj = rstart*bs; 3871 for (i=0; i<m; i++) { 3872 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3873 mycols += locrowlens[i]; 3874 vals += locrowlens[i]; 3875 jj++; 3876 } 3877 /* read in other processors (except the last one) and ship out */ 3878 for (i=1; i<size-1; i++) { 3879 nz = procsnz[i]; 3880 vals = buf; 3881 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3882 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3883 } 3884 /* the last proc */ 3885 if (size != 1) { 3886 nz = procsnz[i] - extra_rows; 3887 vals = buf; 3888 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3889 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3890 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3891 } 3892 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3893 } else { 3894 /* receive numeric values */ 3895 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3896 3897 /* receive message of values*/ 3898 vals = buf; 3899 mycols = ibuf; 3900 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3901 3902 /* insert into matrix */ 3903 jj = rstart*bs; 3904 for (i=0; i<m; i++) { 3905 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3906 mycols += locrowlens[i]; 3907 vals += locrowlens[i]; 3908 jj++; 3909 } 3910 } 3911 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3912 ierr = PetscFree(buf);CHKERRQ(ierr); 3913 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3914 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3915 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3916 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3917 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3918 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3919 PetscFunctionReturn(0); 3920 } 3921 3922 #undef __FUNCT__ 3923 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3924 /*@ 3925 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3926 3927 Input Parameters: 3928 . mat - the matrix 3929 . fact - factor 3930 3931 Not Collective, each process can use a different factor 3932 3933 Level: advanced 3934 3935 Notes: 3936 This can also be set by the command line option: -mat_use_hash_table <fact> 3937 3938 .keywords: matrix, hashtable, factor, HT 3939 3940 .seealso: MatSetOption() 3941 @*/ 3942 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3943 { 3944 PetscErrorCode ierr; 3945 3946 PetscFunctionBegin; 3947 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3948 PetscFunctionReturn(0); 3949 } 3950 3951 #undef __FUNCT__ 3952 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3953 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3954 { 3955 Mat_MPIBAIJ *baij; 3956 3957 PetscFunctionBegin; 3958 baij = (Mat_MPIBAIJ*)mat->data; 3959 baij->ht_fact = fact; 3960 PetscFunctionReturn(0); 3961 } 3962 3963 #undef __FUNCT__ 3964 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3965 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3966 { 3967 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3968 3969 PetscFunctionBegin; 3970 *Ad = a->A; 3971 *Ao = a->B; 3972 *colmap = a->garray; 3973 PetscFunctionReturn(0); 3974 } 3975 3976 /* 3977 Special version for direct calls from Fortran (to eliminate two function call overheads 3978 */ 3979 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3980 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3981 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3982 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3983 #endif 3984 3985 #undef __FUNCT__ 3986 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3987 /*@C 3988 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3989 3990 Collective on Mat 3991 3992 Input Parameters: 3993 + mat - the matrix 3994 . min - number of input rows 3995 . im - input rows 3996 . nin - number of input columns 3997 . in - input columns 3998 . v - numerical values input 3999 - addvin - INSERT_VALUES or ADD_VALUES 4000 4001 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 4002 4003 Level: advanced 4004 4005 .seealso: MatSetValuesBlocked() 4006 @*/ 4007 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 4008 { 4009 /* convert input arguments to C version */ 4010 Mat mat = *matin; 4011 PetscInt m = *min, n = *nin; 4012 InsertMode addv = *addvin; 4013 4014 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 4015 const MatScalar *value; 4016 MatScalar *barray = baij->barray; 4017 PetscBool roworiented = baij->roworiented; 4018 PetscErrorCode ierr; 4019 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 4020 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 4021 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 4022 4023 PetscFunctionBegin; 4024 /* tasks normally handled by MatSetValuesBlocked() */ 4025 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 4026 #if defined(PETSC_USE_DEBUG) 4027 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4028 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4029 #endif 4030 if (mat->assembled) { 4031 mat->was_assembled = PETSC_TRUE; 4032 mat->assembled = PETSC_FALSE; 4033 } 4034 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4035 4036 4037 if (!barray) { 4038 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 4039 baij->barray = barray; 4040 } 4041 4042 if (roworiented) stepval = (n-1)*bs; 4043 else stepval = (m-1)*bs; 4044 4045 for (i=0; i<m; i++) { 4046 if (im[i] < 0) continue; 4047 #if defined(PETSC_USE_DEBUG) 4048 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4049 #endif 4050 if (im[i] >= rstart && im[i] < rend) { 4051 row = im[i] - rstart; 4052 for (j=0; j<n; j++) { 4053 /* If NumCol = 1 then a copy is not required */ 4054 if ((roworiented) && (n == 1)) { 4055 barray = (MatScalar*)v + i*bs2; 4056 } else if ((!roworiented) && (m == 1)) { 4057 barray = (MatScalar*)v + j*bs2; 4058 } else { /* Here a copy is required */ 4059 if (roworiented) { 4060 value = v + i*(stepval+bs)*bs + j*bs; 4061 } else { 4062 value = v + j*(stepval+bs)*bs + i*bs; 4063 } 4064 for (ii=0; ii<bs; ii++,value+=stepval) { 4065 for (jj=0; jj<bs; jj++) { 4066 *barray++ = *value++; 4067 } 4068 } 4069 barray -=bs2; 4070 } 4071 4072 if (in[j] >= cstart && in[j] < cend) { 4073 col = in[j] - cstart; 4074 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4075 } else if (in[j] < 0) continue; 4076 #if defined(PETSC_USE_DEBUG) 4077 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4078 #endif 4079 else { 4080 if (mat->was_assembled) { 4081 if (!baij->colmap) { 4082 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4083 } 4084 4085 #if defined(PETSC_USE_DEBUG) 4086 #if defined(PETSC_USE_CTABLE) 4087 { PetscInt data; 4088 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4089 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4090 } 4091 #else 4092 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4093 #endif 4094 #endif 4095 #if defined(PETSC_USE_CTABLE) 4096 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4097 col = (col - 1)/bs; 4098 #else 4099 col = (baij->colmap[in[j]] - 1)/bs; 4100 #endif 4101 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4102 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4103 col = in[j]; 4104 } 4105 } else col = in[j]; 4106 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4107 } 4108 } 4109 } else { 4110 if (!baij->donotstash) { 4111 if (roworiented) { 4112 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4113 } else { 4114 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4115 } 4116 } 4117 } 4118 } 4119 4120 /* task normally handled by MatSetValuesBlocked() */ 4121 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4122 PetscFunctionReturn(0); 4123 } 4124 4125 #undef __FUNCT__ 4126 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4127 /*@ 4128 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4129 CSR format the local rows. 4130 4131 Collective on MPI_Comm 4132 4133 Input Parameters: 4134 + comm - MPI communicator 4135 . bs - the block size, only a block size of 1 is supported 4136 . m - number of local rows (Cannot be PETSC_DECIDE) 4137 . n - This value should be the same as the local size used in creating the 4138 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4139 calculated if N is given) For square matrices n is almost always m. 4140 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4141 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4142 . i - row indices 4143 . j - column indices 4144 - a - matrix values 4145 4146 Output Parameter: 4147 . mat - the matrix 4148 4149 Level: intermediate 4150 4151 Notes: 4152 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4153 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4154 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4155 4156 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 4157 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 4158 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 4159 with column-major ordering within blocks. 4160 4161 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4162 4163 .keywords: matrix, aij, compressed row, sparse, parallel 4164 4165 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4166 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4167 @*/ 4168 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4169 { 4170 PetscErrorCode ierr; 4171 4172 PetscFunctionBegin; 4173 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4174 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4175 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4176 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4177 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4178 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4179 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4180 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4181 PetscFunctionReturn(0); 4182 } 4183