1be1d678aSKris Buschelman #define PETSCMAT_DLL 2a30f8f8cSSatish Balay 3a30f8f8cSSatish Balay /* 4a30f8f8cSSatish Balay Support for the parallel SBAIJ matrix vector multiply 5a30f8f8cSSatish Balay */ 67c4f633dSBarry Smith #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 77cff1425SSatish Balay 8*09573ac7SBarry Smith extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 97cff1425SSatish Balay 10a30f8f8cSSatish Balay 114a2ae208SSatish Balay #undef __FUNCT__ 124a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13dfbe8321SBarry Smith PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 14a30f8f8cSSatish Balay { 1540781036SHong Zhang Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 1640781036SHong Zhang Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 176849ba73SBarry Smith PetscErrorCode ierr; 1813f74950SBarry Smith PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 19d0f46423SBarry Smith PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 2040781036SHong Zhang IS from,to; 2140781036SHong Zhang Vec gvec; 2213f74950SBarry Smith PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 23899cda47SBarry Smith PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; 2440781036SHong Zhang PetscScalar *ptr; 2540781036SHong Zhang 2640781036SHong Zhang PetscFunctionBegin; 2701b2bd88SHong Zhang if (sbaij->sMvctx) { 2801b2bd88SHong Zhang /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */ 2901b2bd88SHong Zhang ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr); 3001b2bd88SHong Zhang sbaij->sMvctx = 0; 3140781036SHong Zhang } 3240781036SHong Zhang 3340781036SHong Zhang /* For the first stab we make an array as long as the number of columns */ 3440781036SHong Zhang /* mark those columns that are in sbaij->B */ 3574ed9c26SBarry Smith ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 3613f74950SBarry Smith ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 3740781036SHong Zhang for (i=0; i<mbs; i++) { 3840781036SHong Zhang for (j=0; j<B->ilen[i]; j++) { 3940781036SHong Zhang if (!indices[aj[B->i[i] + j]]) ec++; 4040781036SHong Zhang indices[aj[B->i[i] + j] ] = 1; 4140781036SHong Zhang } 4240781036SHong Zhang } 4340781036SHong Zhang 4440781036SHong Zhang /* form arrays of columns we need */ 4574ed9c26SBarry Smith ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 4674ed9c26SBarry Smith ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr); 4740781036SHong Zhang 4840781036SHong Zhang ec = 0; 4940781036SHong Zhang for (j=0; j<size; j++){ 5040781036SHong Zhang for (i=owners[j]; i<owners[j+1]; i++){ 5140781036SHong Zhang if (indices[i]) { 5240781036SHong Zhang garray[ec] = i; 5340781036SHong Zhang ec_owner[ec] = j; 5440781036SHong Zhang ec++; 5540781036SHong Zhang } 5640781036SHong Zhang } 5740781036SHong Zhang } 5840781036SHong Zhang 5940781036SHong Zhang /* make indices now point into garray */ 60b424e231SHong Zhang for (i=0; i<ec; i++) indices[garray[i]] = i; 6140781036SHong Zhang 6240781036SHong Zhang /* compact out the extra columns in B */ 6340781036SHong Zhang for (i=0; i<mbs; i++) { 6440781036SHong Zhang for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 6540781036SHong Zhang } 6640781036SHong Zhang B->nbs = ec; 67d0f46423SBarry Smith sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 6826283091SBarry Smith ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 6940781036SHong Zhang ierr = PetscFree(indices);CHKERRQ(ierr); 7040781036SHong Zhang 7140781036SHong Zhang /* create local vector that is used to scatter into */ 7240781036SHong Zhang ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 7340781036SHong Zhang 7440781036SHong Zhang /* create two temporary index sets for building scatter-gather */ 7574ed9c26SBarry Smith ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 76deff0451SBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 77e82e9f6bSBarry Smith for (i=0; i<ec; i++) { stmp[i] = i; } 78deff0451SBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 7940781036SHong Zhang 8093d1592dSHong Zhang /* generate the scatter context 8193d1592dSHong Zhang -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 82d0f46423SBarry Smith ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 8340781036SHong Zhang ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 84b5eb4454SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 8540781036SHong Zhang 8640781036SHong Zhang sbaij->garray = garray; 8752e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 8852e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 8952e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 9052e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 9140781036SHong Zhang 9240781036SHong Zhang ierr = ISDestroy(from);CHKERRQ(ierr); 9340781036SHong Zhang ierr = ISDestroy(to);CHKERRQ(ierr); 9440781036SHong Zhang 9540781036SHong Zhang /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 9640781036SHong Zhang lsize = (mbs + ec)*bs; 977adad957SLisandro Dalcin ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 9840781036SHong Zhang ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 9940781036SHong Zhang ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 10040781036SHong Zhang 101d0f46423SBarry Smith sowners = sbaij->slvec0->map->range; 10240781036SHong Zhang 10340781036SHong Zhang /* x index in the IS sfrom */ 10440781036SHong Zhang for (i=0; i<ec; i++) { 10540781036SHong Zhang j = ec_owner[i]; 10640781036SHong Zhang sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 10740781036SHong Zhang } 10840781036SHong Zhang /* b index in the IS sfrom */ 10940781036SHong Zhang k = sowners[rank]/bs + mbs; 11040781036SHong Zhang for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 111deff0451SBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 11240781036SHong Zhang 11340781036SHong Zhang /* x index in the IS sto */ 11440781036SHong Zhang k = sowners[rank]/bs + mbs; 115e82e9f6bSBarry Smith for (i=0; i<ec; i++) stmp[i] = (k + i); 11640781036SHong Zhang /* b index in the IS sto */ 117e82e9f6bSBarry Smith for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 11840781036SHong Zhang 119deff0451SBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 12040781036SHong Zhang 12140781036SHong Zhang ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 12240781036SHong Zhang 12340781036SHong Zhang ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 1241ebc52fbSHong Zhang ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 12540781036SHong Zhang ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 12640781036SHong Zhang ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 1271ebc52fbSHong Zhang ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 12840781036SHong Zhang 1291ebc52fbSHong Zhang ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 13040781036SHong Zhang ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 1311ebc52fbSHong Zhang ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 13240781036SHong Zhang 13340781036SHong Zhang ierr = PetscFree(stmp);CHKERRQ(ierr); 1347adad957SLisandro Dalcin ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr); 13540781036SHong Zhang 13652e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 13752e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 13852e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 13952e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 14052e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 14152e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 14252e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 14352e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 14440781036SHong Zhang 14552e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 14640781036SHong Zhang ierr = ISDestroy(from);CHKERRQ(ierr); 14740781036SHong Zhang ierr = ISDestroy(to);CHKERRQ(ierr); 14874ed9c26SBarry Smith ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 14940781036SHong Zhang PetscFunctionReturn(0); 15040781036SHong Zhang } 15140781036SHong Zhang 15240781036SHong Zhang #undef __FUNCT__ 15340781036SHong Zhang #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 154dfbe8321SBarry Smith PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 15540781036SHong Zhang { 1562896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 157a30f8f8cSSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 1586849ba73SBarry Smith PetscErrorCode ierr; 15913f74950SBarry Smith PetscInt i,j,*aj = B->j,ec = 0,*garray; 160d0f46423SBarry Smith PetscInt bs = mat->rmap->bs,*stmp; 161a30f8f8cSSatish Balay IS from,to; 162a30f8f8cSSatish Balay Vec gvec; 163a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 164a30f8f8cSSatish Balay PetscTable gid1_lid1; 165a30f8f8cSSatish Balay PetscTablePosition tpos; 16613f74950SBarry Smith PetscInt gid,lid; 1676f531f54SSatish Balay #else 16813f74950SBarry Smith PetscInt Nbs = baij->Nbs,*indices; 169a30f8f8cSSatish Balay #endif 170a30f8f8cSSatish Balay 171a30f8f8cSSatish Balay PetscFunctionBegin; 172a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 173a30f8f8cSSatish Balay /* use a table - Mark Adams */ 174a30f8f8cSSatish Balay PetscTableCreate(B->mbs,&gid1_lid1); 175a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 176a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 17713f74950SBarry Smith PetscInt data,gid1 = aj[B->i[i]+j] + 1; 178a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 179a30f8f8cSSatish Balay if (!data) { 180a30f8f8cSSatish Balay /* one based table */ 181a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 182a30f8f8cSSatish Balay } 183a30f8f8cSSatish Balay } 184a30f8f8cSSatish Balay } 185a30f8f8cSSatish Balay /* form array of columns we need */ 18674ed9c26SBarry Smith ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 187a30f8f8cSSatish Balay ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 188a30f8f8cSSatish Balay while (tpos) { 189a30f8f8cSSatish Balay ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 190a30f8f8cSSatish Balay gid--; lid--; 191a30f8f8cSSatish Balay garray[lid] = gid; 192a30f8f8cSSatish Balay } 193a30f8f8cSSatish Balay ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 194a30f8f8cSSatish Balay ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 195a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 196a30f8f8cSSatish Balay ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 197a30f8f8cSSatish Balay } 198a30f8f8cSSatish Balay /* compact out the extra columns in B */ 199a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 200a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 20113f74950SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1; 202a30f8f8cSSatish Balay ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 203a30f8f8cSSatish Balay lid --; 204a30f8f8cSSatish Balay aj[B->i[i]+j] = lid; 205a30f8f8cSSatish Balay } 206a30f8f8cSSatish Balay } 207a30f8f8cSSatish Balay B->nbs = ec; 208d0f46423SBarry Smith baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 20926283091SBarry Smith ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 2109c666560SBarry Smith ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 211a30f8f8cSSatish Balay /* Mark Adams */ 212a30f8f8cSSatish Balay #else 213a30f8f8cSSatish Balay /* For the first stab we make an array as long as the number of columns */ 214a30f8f8cSSatish Balay /* mark those columns that are in baij->B */ 21574ed9c26SBarry Smith ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 21613f74950SBarry Smith ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 217a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 218a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 219a30f8f8cSSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 220a30f8f8cSSatish Balay indices[aj[B->i[i] + j] ] = 1; 221a30f8f8cSSatish Balay } 222a30f8f8cSSatish Balay } 223a30f8f8cSSatish Balay 224a30f8f8cSSatish Balay /* form array of columns we need */ 22574ed9c26SBarry Smith ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 226f3ef73ceSBarry Smith ec = 0; 227f3ef73ceSBarry Smith for (i=0; i<Nbs; i++) { 228f3ef73ceSBarry Smith if (indices[i]) { 229f3ef73ceSBarry Smith garray[ec++] = i; 230f3ef73ceSBarry Smith } 231f3ef73ceSBarry Smith } 232a30f8f8cSSatish Balay 233a30f8f8cSSatish Balay /* make indices now point into garray */ 234a30f8f8cSSatish Balay for (i=0; i<ec; i++) { 235a30f8f8cSSatish Balay indices[garray[i]] = i; 236a30f8f8cSSatish Balay } 237a30f8f8cSSatish Balay 238a30f8f8cSSatish Balay /* compact out the extra columns in B */ 239a30f8f8cSSatish Balay for (i=0; i<B->mbs; i++) { 240a30f8f8cSSatish Balay for (j=0; j<B->ilen[i]; j++) { 241a30f8f8cSSatish Balay aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 242a30f8f8cSSatish Balay } 243a30f8f8cSSatish Balay } 244a30f8f8cSSatish Balay B->nbs = ec; 245d0f46423SBarry Smith baij->B->cmap->n = ec*mat->rmap->bs; 246a30f8f8cSSatish Balay ierr = PetscFree(indices);CHKERRQ(ierr); 247a30f8f8cSSatish Balay #endif 248633e10c7SHong Zhang 249a30f8f8cSSatish Balay /* create local vector that is used to scatter into */ 250a30f8f8cSSatish Balay ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 251a30f8f8cSSatish Balay 252a30f8f8cSSatish Balay /* create two temporary index sets for building scatter-gather */ 253deff0451SBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 254a30f8f8cSSatish Balay 25574ed9c26SBarry Smith ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 256e82e9f6bSBarry Smith for (i=0; i<ec; i++) { stmp[i] = i; } 257deff0451SBarry Smith ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 258a30f8f8cSSatish Balay 259a30f8f8cSSatish Balay /* create temporary global vector to generate scatter context */ 260d0f46423SBarry Smith ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 261a30f8f8cSSatish Balay ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 262b5eb4454SBarry Smith ierr = VecDestroy(gvec);CHKERRQ(ierr); 263a30f8f8cSSatish Balay 26452e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 26552e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 26652e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 26752e6d16bSBarry Smith ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 268a30f8f8cSSatish Balay baij->garray = garray; 26952e6d16bSBarry Smith ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 270a30f8f8cSSatish Balay ierr = ISDestroy(from);CHKERRQ(ierr); 271a30f8f8cSSatish Balay ierr = ISDestroy(to);CHKERRQ(ierr); 272a30f8f8cSSatish Balay PetscFunctionReturn(0); 273a30f8f8cSSatish Balay } 274a30f8f8cSSatish Balay 275a30f8f8cSSatish Balay /* 27601b2bd88SHong Zhang Takes the local part of an already assembled MPISBAIJ matrix 277a30f8f8cSSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 278a30f8f8cSSatish Balay that require more communication in the matrix vector multiply. 279a30f8f8cSSatish Balay Thus certain data-structures must be rebuilt. 280a30f8f8cSSatish Balay 281a30f8f8cSSatish Balay Kind of slow! But that's what application programmers get when 282a30f8f8cSSatish Balay they are sloppy. 283a30f8f8cSSatish Balay */ 2844a2ae208SSatish Balay #undef __FUNCT__ 2854a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPISBAIJ" 286dfbe8321SBarry Smith PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 287a30f8f8cSSatish Balay { 2882896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 289a30f8f8cSSatish Balay Mat B = baij->B,Bnew; 290a30f8f8cSSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 291dfbe8321SBarry Smith PetscErrorCode ierr; 292d0f46423SBarry Smith PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 293d0f46423SBarry Smith PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 294a30f8f8cSSatish Balay MatScalar *a = Bbaij->a; 29587828ca2SBarry Smith PetscScalar *atmp; 29665460251SBarry Smith #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 29713f74950SBarry Smith PetscInt l; 298a30f8f8cSSatish Balay #endif 299a30f8f8cSSatish Balay 300a30f8f8cSSatish Balay PetscFunctionBegin; 30165460251SBarry Smith #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 302d0f46423SBarry Smith ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); 30382502324SSatish Balay #endif 304a30f8f8cSSatish Balay /* free stuff related to matrix-vec multiply */ 305b0a32e0cSBarry Smith ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 30601b2bd88SHong Zhang ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); 30701b2bd88SHong Zhang baij->lvec = 0; 30801b2bd88SHong Zhang ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); 30901b2bd88SHong Zhang baij->Mvctx = 0; 31001b2bd88SHong Zhang 31101b2bd88SHong Zhang ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 31201b2bd88SHong Zhang ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 31301b2bd88SHong Zhang baij->slvec0 = 0; 31401b2bd88SHong Zhang ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 31501b2bd88SHong Zhang ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 31601b2bd88SHong Zhang ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 31701b2bd88SHong Zhang baij->slvec1 = 0; 31801b2bd88SHong Zhang 319a30f8f8cSSatish Balay if (baij->colmap) { 320a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 3219c666560SBarry Smith ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 322a30f8f8cSSatish Balay #else 323a30f8f8cSSatish Balay ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 324a30f8f8cSSatish Balay baij->colmap = 0; 32552e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 326a30f8f8cSSatish Balay #endif 327a30f8f8cSSatish Balay } 328a30f8f8cSSatish Balay 329a30f8f8cSSatish Balay /* make sure that B is assembled so we can access its values */ 330a30f8f8cSSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331a30f8f8cSSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 332a30f8f8cSSatish Balay 333a30f8f8cSSatish Balay /* invent new B and copy stuff over */ 33413f74950SBarry Smith ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 335a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 336a30f8f8cSSatish Balay nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 337a30f8f8cSSatish Balay } 338f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 339f69a0ea3SMatthew Knepley ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 3407adad957SLisandro Dalcin ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 341d0f46423SBarry Smith ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 342a30f8f8cSSatish Balay ierr = PetscFree(nz);CHKERRQ(ierr); 343a30f8f8cSSatish Balay 34413f74950SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 345a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 346a30f8f8cSSatish Balay rvals[0] = bs*i; 347a30f8f8cSSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 348a30f8f8cSSatish Balay for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 349a30f8f8cSSatish Balay col = garray[Bbaij->j[j]]*bs; 350a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 35165460251SBarry Smith #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 352a30f8f8cSSatish Balay for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 353a30f8f8cSSatish Balay #else 35471730473SSatish Balay atmp = a+j*bs2 + k*bs; 355a30f8f8cSSatish Balay #endif 356c8407628SSatish Balay ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 357a30f8f8cSSatish Balay col++; 358a30f8f8cSSatish Balay } 359a30f8f8cSSatish Balay } 360a30f8f8cSSatish Balay } 36165460251SBarry Smith #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 362a30f8f8cSSatish Balay ierr = PetscFree(atmp);CHKERRQ(ierr); 363a30f8f8cSSatish Balay #endif 364a30f8f8cSSatish Balay ierr = PetscFree(baij->garray);CHKERRQ(ierr); 365a30f8f8cSSatish Balay baij->garray = 0; 366a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 36752e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 368a30f8f8cSSatish Balay ierr = MatDestroy(B);CHKERRQ(ierr); 36952e6d16bSBarry Smith ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 370a30f8f8cSSatish Balay baij->B = Bnew; 371a30f8f8cSSatish Balay A->was_assembled = PETSC_FALSE; 372a30f8f8cSSatish Balay PetscFunctionReturn(0); 373a30f8f8cSSatish Balay } 374a30f8f8cSSatish Balay 375a30f8f8cSSatish Balay 376a30f8f8cSSatish Balay 377