123bdbc58SBarry Smith 2d5b485abSSatish Balay /* 3d5b485abSSatish Balay Routines to compute overlapping regions of a parallel MPI matrix 4d5b485abSSatish Balay and to find submatrices that were shared across processors. 5d5b485abSSatish Balay */ 6c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> 7c6db04a5SJed Brown #include <petscbt.h> 8d5b485abSSatish Balay 9b24ad042SBarry Smith static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**); 10b24ad042SBarry Smith static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*); 1109573ac7SBarry Smith extern PetscErrorCode MatGetRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 1209573ac7SBarry Smith extern PetscErrorCode MatRestoreRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 13d5b485abSSatish Balay 144a2ae208SSatish Balay #undef __FUNCT__ 154a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ" 16b24ad042SBarry Smith PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov) 17d5b485abSSatish Balay { 186849ba73SBarry Smith PetscErrorCode ierr; 19d0f46423SBarry Smith PetscInt i,N=C->cmap->N, bs=C->rmap->bs; 2036f4e84dSSatish Balay IS *is_new; 2136f4e84dSSatish Balay 223a40ed3dSBarry Smith PetscFunctionBegin; 2382502324SSatish Balay ierr = PetscMalloc(imax*sizeof(IS),&is_new);CHKERRQ(ierr); 24df36731bSBarry Smith /* Convert the indices into block format */ 2505d8c843SHong Zhang ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,imax,is,is_new);CHKERRQ(ierr); 2626fbe8dcSKarl Rupp if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n"); 27d5b485abSSatish Balay for (i=0; i<ov; ++i) { 2836f4e84dSSatish Balay ierr = MatIncreaseOverlap_MPIBAIJ_Once(C,imax,is_new);CHKERRQ(ierr); 29d5b485abSSatish Balay } 306bf464f9SBarry Smith for (i=0; i<imax; i++) {ierr = ISDestroy(&is[i]);CHKERRQ(ierr);} 3105d8c843SHong Zhang ierr = ISExpandIndicesGeneral(N,N,bs,imax,is_new,is);CHKERRQ(ierr); 326bf464f9SBarry Smith for (i=0; i<imax; i++) {ierr = ISDestroy(&is_new[i]);CHKERRQ(ierr);} 33606d414cSSatish Balay ierr = PetscFree(is_new);CHKERRQ(ierr); 343a40ed3dSBarry Smith PetscFunctionReturn(0); 35d5b485abSSatish Balay } 36d5b485abSSatish Balay 37d5b485abSSatish Balay /* 38d5b485abSSatish Balay Sample message format: 39d5b485abSSatish Balay If a processor A wants processor B to process some elements corresponding 400e9b0e7eSHong Zhang to index sets is[1], is[5] 41d5b485abSSatish Balay mesg [0] = 2 (no of index sets in the mesg) 42d5b485abSSatish Balay ----------- 43d5b485abSSatish Balay mesg [1] = 1 => is[1] 44d5b485abSSatish Balay mesg [2] = sizeof(is[1]); 45d5b485abSSatish Balay ----------- 46d5b485abSSatish Balay mesg [5] = 5 => is[5] 47d5b485abSSatish Balay mesg [6] = sizeof(is[5]); 48d5b485abSSatish Balay ----------- 49d5b485abSSatish Balay mesg [7] 500e9b0e7eSHong Zhang mesg [n] data(is[1]) 51d5b485abSSatish Balay ----------- 52d5b485abSSatish Balay mesg[n+1] 53d5b485abSSatish Balay mesg[m] data(is[5]) 54d5b485abSSatish Balay ----------- 55d5b485abSSatish Balay 56d5b485abSSatish Balay Notes: 57d5b485abSSatish Balay nrqs - no of requests sent (or to be sent out) 58d5b485abSSatish Balay nrqr - no of requests recieved (which have to be or which have been processed 59d5b485abSSatish Balay */ 604a2ae208SSatish Balay #undef __FUNCT__ 614a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Once" 62db41cccfSHong Zhang PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Once(Mat C,PetscInt imax,IS is[]) 63d5b485abSSatish Balay { 64df36731bSBarry Smith Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 655d0c19d7SBarry Smith const PetscInt **idx,*idx_i; 6624c049a4SHong Zhang PetscInt *n,*w3,*w4,**data,len; 676849ba73SBarry Smith PetscErrorCode ierr; 68b24ad042SBarry Smith PetscMPIInt size,rank,tag1,tag2,*w2,*w1,nrqr; 69245d216aSHong Zhang PetscInt Mbs,i,j,k,**rbuf,row,proc=-1,nrqs,msz,**outdat,**ptr; 708f9f447aSHong Zhang PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p; 71b24ad042SBarry Smith PetscMPIInt *onodes1,*olengths1,*onodes2,*olengths2; 72f1af5d2fSBarry Smith PetscBT *table; 73d5b485abSSatish Balay MPI_Comm comm; 74d5b485abSSatish Balay MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; 75d5b485abSSatish Balay MPI_Status *s_status,*recv_status; 768f9f447aSHong Zhang char *t_p; 77d5b485abSSatish Balay 783a40ed3dSBarry Smith PetscFunctionBegin; 79ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 80d5b485abSSatish Balay size = c->size; 81d5b485abSSatish Balay rank = c->rank; 82df36731bSBarry Smith Mbs = c->Mbs; 83d5b485abSSatish Balay 84c7dd2924SSatish Balay ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 85c7dd2924SSatish Balay ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 86c7dd2924SSatish Balay 87*dcca6d9dSJed Brown ierr = PetscMalloc2(imax+1,&idx,imax,&n);CHKERRQ(ierr); 8824c049a4SHong Zhang 89d5b485abSSatish Balay for (i=0; i<imax; i++) { 90d5b485abSSatish Balay ierr = ISGetIndices(is[i],&idx[i]);CHKERRQ(ierr); 91b9b97703SBarry Smith ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr); 92d5b485abSSatish Balay } 93d5b485abSSatish Balay 94d5b485abSSatish Balay /* evaluate communication - mesg to who,length of mesg, and buffer space 95d5b485abSSatish Balay required. Based on this, buffers are allocated, and data copied into them*/ 96*dcca6d9dSJed Brown ierr = PetscMalloc4(size,&w1,size,&w2,size,&w3,size,&w4);CHKERRQ(ierr); 9723bdbc58SBarry Smith ierr = PetscMemzero(w1,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 9823bdbc58SBarry Smith ierr = PetscMemzero(w2,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 9923bdbc58SBarry Smith ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); 100d5b485abSSatish Balay for (i=0; i<imax; i++) { 101b24ad042SBarry Smith ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/ 102d5b485abSSatish Balay idx_i = idx[i]; 103d5b485abSSatish Balay len = n[i]; 104d5b485abSSatish Balay for (j=0; j<len; j++) { 105d5b485abSSatish Balay row = idx_i[j]; 106f23aa3ddSBarry Smith if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries"); 107ca31476aSJed Brown ierr = PetscLayoutFindOwner(C->rmap,row*C->rmap->bs,&proc);CHKERRQ(ierr); 108d5b485abSSatish Balay w4[proc]++; 109d5b485abSSatish Balay } 110d5b485abSSatish Balay for (j=0; j<size; j++) { 111d5b485abSSatish Balay if (w4[j]) { w1[j] += w4[j]; w3[j]++;} 112d5b485abSSatish Balay } 113d5b485abSSatish Balay } 114d5b485abSSatish Balay 115d5b485abSSatish Balay nrqs = 0; /* no of outgoing messages */ 116d5b485abSSatish Balay msz = 0; /* total mesg length (for all proc */ 1170e9b0e7eSHong Zhang w1[rank] = 0; /* no mesg sent to itself */ 118d5b485abSSatish Balay w3[rank] = 0; 119d5b485abSSatish Balay for (i=0; i<size; i++) { 120d5b485abSSatish Balay if (w1[i]) {w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 121d5b485abSSatish Balay } 122d5b485abSSatish Balay /* pa - is list of processors to communicate with */ 123b24ad042SBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); 124d5b485abSSatish Balay for (i=0,j=0; i<size; i++) { 125d5b485abSSatish Balay if (w1[i]) {pa[j] = i; j++;} 126d5b485abSSatish Balay } 127d5b485abSSatish Balay 128d5b485abSSatish Balay /* Each message would have a header = 1 + 2*(no of IS) + data */ 129d5b485abSSatish Balay for (i=0; i<nrqs; i++) { 130d5b485abSSatish Balay j = pa[i]; 131d5b485abSSatish Balay w1[j] += w2[j] + 2*w3[j]; 132d5b485abSSatish Balay msz += w1[j]; 133d5b485abSSatish Balay } 134d5b485abSSatish Balay 135c7dd2924SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 136a96d083dSSatish Balay ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr); 137c7dd2924SSatish Balay ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr); 138d5b485abSSatish Balay 139c7dd2924SSatish Balay /* Now post the Irecvs corresponding to these messages */ 140c7dd2924SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);CHKERRQ(ierr); 141d5b485abSSatish Balay 142d5b485abSSatish Balay /* Allocate Memory for outgoing messages */ 143*dcca6d9dSJed Brown ierr = PetscMalloc4(size,&outdat,size,&ptr,msz,&tmp,size,&ctr);CHKERRQ(ierr); 14423bdbc58SBarry Smith ierr = PetscMemzero(outdat,size*sizeof(PetscInt*));CHKERRQ(ierr); 14523bdbc58SBarry Smith ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); 146d5b485abSSatish Balay { 147b24ad042SBarry Smith PetscInt *iptr = tmp,ict = 0; 148d5b485abSSatish Balay for (i=0; i<nrqs; i++) { 149d5b485abSSatish Balay j = pa[i]; 150d5b485abSSatish Balay iptr += ict; 151d5b485abSSatish Balay outdat[j] = iptr; 152d5b485abSSatish Balay ict = w1[j]; 153d5b485abSSatish Balay } 154d5b485abSSatish Balay } 155d5b485abSSatish Balay 156d5b485abSSatish Balay /* Form the outgoing messages */ 157d5b485abSSatish Balay /*plug in the headers*/ 158d5b485abSSatish Balay for (i=0; i<nrqs; i++) { 159d5b485abSSatish Balay j = pa[i]; 160d5b485abSSatish Balay outdat[j][0] = 0; 161b24ad042SBarry Smith ierr = PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); 162d5b485abSSatish Balay ptr[j] = outdat[j] + 2*w3[j] + 1; 163d5b485abSSatish Balay } 164d5b485abSSatish Balay 165d5b485abSSatish Balay /* Memory for doing local proc's work*/ 166d5b485abSSatish Balay { 167*dcca6d9dSJed Brown ierr = PetscMalloc5(imax,&table, imax,&data, imax,&isz, Mbs*imax,&d_p, (Mbs/PETSC_BITS_PER_BYTE+1)*imax,&t_p);CHKERRQ(ierr); 1688f9f447aSHong Zhang ierr = PetscMemzero(table,imax*sizeof(PetscBT));CHKERRQ(ierr); 1698f9f447aSHong Zhang ierr = PetscMemzero(data,imax*sizeof(PetscInt*));CHKERRQ(ierr); 1708f9f447aSHong Zhang ierr = PetscMemzero(isz,imax*sizeof(PetscInt));CHKERRQ(ierr); 1718f9f447aSHong Zhang ierr = PetscMemzero(d_p,Mbs*imax*sizeof(PetscInt));CHKERRQ(ierr); 1728f9f447aSHong Zhang ierr = PetscMemzero(t_p,(Mbs/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char));CHKERRQ(ierr); 173d5b485abSSatish Balay 174bbd702dbSSatish Balay for (i=0; i<imax; i++) { 175b6410449SSatish Balay table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; 176bbd702dbSSatish Balay data[i] = d_p + (Mbs)*i; 177d5b485abSSatish Balay } 178d5b485abSSatish Balay } 179d5b485abSSatish Balay 180d5b485abSSatish Balay /* Parse the IS and update local tables and the outgoing buf with the data*/ 181d5b485abSSatish Balay { 182b24ad042SBarry Smith PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j; 183f1af5d2fSBarry Smith PetscBT table_i; 184d5b485abSSatish Balay 185d5b485abSSatish Balay for (i=0; i<imax; i++) { 186b24ad042SBarry Smith ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); 187d5b485abSSatish Balay n_i = n[i]; 188d5b485abSSatish Balay table_i = table[i]; 189d5b485abSSatish Balay idx_i = idx[i]; 190d5b485abSSatish Balay data_i = data[i]; 191d5b485abSSatish Balay isz_i = isz[i]; 192d5b485abSSatish Balay for (j=0; j<n_i; j++) { /* parse the indices of each IS */ 193d5b485abSSatish Balay row = idx_i[j]; 194ca31476aSJed Brown ierr = PetscLayoutFindOwner(C->rmap,row*C->rmap->bs,&proc);CHKERRQ(ierr); 195d5b485abSSatish Balay if (proc != rank) { /* copy to the outgoing buffer */ 196d5b485abSSatish Balay ctr[proc]++; 197d5b485abSSatish Balay *ptr[proc] = row; 198d5b485abSSatish Balay ptr[proc]++; 199d6b45a43SBarry Smith } else { /* Update the local table */ 20026fbe8dcSKarl Rupp if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; 201d5b485abSSatish Balay } 202d5b485abSSatish Balay } 203d5b485abSSatish Balay /* Update the headers for the current IS */ 204d5b485abSSatish Balay for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */ 205d5b485abSSatish Balay if ((ctr_j = ctr[j])) { 206d5b485abSSatish Balay outdat_j = outdat[j]; 207d5b485abSSatish Balay k = ++outdat_j[0]; 208d5b485abSSatish Balay outdat_j[2*k] = ctr_j; 209d5b485abSSatish Balay outdat_j[2*k-1] = i; 210d5b485abSSatish Balay } 211d5b485abSSatish Balay } 212d5b485abSSatish Balay isz[i] = isz_i; 213d5b485abSSatish Balay } 214d5b485abSSatish Balay } 215d5b485abSSatish Balay 216d5b485abSSatish Balay /* Now post the sends */ 217b0a32e0cSBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); 218d5b485abSSatish Balay for (i=0; i<nrqs; ++i) { 219d5b485abSSatish Balay j = pa[i]; 220b24ad042SBarry Smith ierr = MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);CHKERRQ(ierr); 221d5b485abSSatish Balay } 222d5b485abSSatish Balay 223d5b485abSSatish Balay /* No longer need the original indices*/ 224d5b485abSSatish Balay for (i=0; i<imax; ++i) { 225d5b485abSSatish Balay ierr = ISRestoreIndices(is[i],idx+i);CHKERRQ(ierr); 226d5b485abSSatish Balay } 22724c049a4SHong Zhang ierr = PetscFree2(idx,n);CHKERRQ(ierr); 228d5b485abSSatish Balay 229d5b485abSSatish Balay for (i=0; i<imax; ++i) { 2306bf464f9SBarry Smith ierr = ISDestroy(&is[i]);CHKERRQ(ierr); 231d5b485abSSatish Balay } 232d5b485abSSatish Balay 233d5b485abSSatish Balay /* Do Local work*/ 234df36731bSBarry Smith ierr = MatIncreaseOverlap_MPIBAIJ_Local(C,imax,table,isz,data);CHKERRQ(ierr); 235d5b485abSSatish Balay 236d5b485abSSatish Balay /* Receive messages*/ 237b0a32e0cSBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);CHKERRQ(ierr); 2380c468ba9SBarry Smith if (nrqr) {ierr = MPI_Waitall(nrqr,r_waits1,recv_status);CHKERRQ(ierr);} 239d5b485abSSatish Balay 240b0a32e0cSBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);CHKERRQ(ierr); 2410c468ba9SBarry Smith if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);} 242d5b485abSSatish Balay 243d5b485abSSatish Balay /* Phase 1 sends are complete - deallocate buffers */ 24423bdbc58SBarry Smith ierr = PetscFree4(outdat,ptr,tmp,ctr);CHKERRQ(ierr); 24523bdbc58SBarry Smith ierr = PetscFree4(w1,w2,w3,w4);CHKERRQ(ierr); 246d5b485abSSatish Balay 247b24ad042SBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);CHKERRQ(ierr); 248b24ad042SBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);CHKERRQ(ierr); 249df36731bSBarry Smith ierr = MatIncreaseOverlap_MPIBAIJ_Receive(C,nrqr,rbuf,xdata,isz1);CHKERRQ(ierr); 250c05d87d6SBarry Smith ierr = PetscFree(rbuf[0]);CHKERRQ(ierr); 251606d414cSSatish Balay ierr = PetscFree(rbuf);CHKERRQ(ierr); 252d5b485abSSatish Balay 253d5b485abSSatish Balay /* Send the data back*/ 254d5b485abSSatish Balay /* Do a global reduction to know the buffer space req for incoming messages*/ 255d5b485abSSatish Balay { 256b24ad042SBarry Smith PetscMPIInt *rw1; 257d5b485abSSatish Balay 258b24ad042SBarry Smith ierr = PetscMalloc(size*sizeof(PetscInt),&rw1);CHKERRQ(ierr); 259b24ad042SBarry Smith ierr = PetscMemzero(rw1,size*sizeof(PetscInt));CHKERRQ(ierr); 260c7dd2924SSatish Balay 261d5b485abSSatish Balay for (i=0; i<nrqr; ++i) { 262d5b485abSSatish Balay proc = recv_status[i].MPI_SOURCE; 263e32f2f54SBarry Smith if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch"); 264d5b485abSSatish Balay rw1[proc] = isz1[i]; 265d5b485abSSatish Balay } 266d5b485abSSatish Balay 267c7dd2924SSatish Balay ierr = PetscFree(onodes1);CHKERRQ(ierr); 268c7dd2924SSatish Balay ierr = PetscFree(olengths1);CHKERRQ(ierr); 269d5b485abSSatish Balay 270c7dd2924SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 271c7dd2924SSatish Balay ierr = PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);CHKERRQ(ierr); 27203512dc5SSatish Balay ierr = PetscFree(rw1);CHKERRQ(ierr); 273c7dd2924SSatish Balay } 274c7dd2924SSatish Balay /* Now post the Irecvs corresponding to these messages */ 275c7dd2924SSatish Balay ierr = PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);CHKERRQ(ierr); 276d5b485abSSatish Balay 277d5b485abSSatish Balay /* Now post the sends */ 278b0a32e0cSBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); 279d5b485abSSatish Balay for (i=0; i<nrqr; ++i) { 280d5b485abSSatish Balay j = recv_status[i].MPI_SOURCE; 281b24ad042SBarry Smith ierr = MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);CHKERRQ(ierr); 282d5b485abSSatish Balay } 283d5b485abSSatish Balay 284d5b485abSSatish Balay /* receive work done on other processors*/ 285d5b485abSSatish Balay { 286b24ad042SBarry Smith PetscMPIInt idex; 287b24ad042SBarry Smith PetscInt is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax; 288f1af5d2fSBarry Smith PetscBT table_i; 289d5b485abSSatish Balay MPI_Status *status2; 290d5b485abSSatish Balay 291169449a0SSatish Balay ierr = PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);CHKERRQ(ierr); 292d5b485abSSatish Balay for (i=0; i<nrqs; ++i) { 293999d9058SBarry Smith ierr = MPI_Waitany(nrqs,r_waits2,&idex,status2+i);CHKERRQ(ierr); 294d5b485abSSatish Balay /* Process the message*/ 295999d9058SBarry Smith rbuf2_i = rbuf2[idex]; 296d5b485abSSatish Balay ct1 = 2*rbuf2_i[0]+1; 297999d9058SBarry Smith jmax = rbuf2[idex][0]; 298d5b485abSSatish Balay for (j=1; j<=jmax; j++) { 299d5b485abSSatish Balay max = rbuf2_i[2*j]; 300d5b485abSSatish Balay is_no = rbuf2_i[2*j-1]; 301d5b485abSSatish Balay isz_i = isz[is_no]; 302d5b485abSSatish Balay data_i = data[is_no]; 303d5b485abSSatish Balay table_i = table[is_no]; 304d5b485abSSatish Balay for (k=0; k<max; k++,ct1++) { 305d5b485abSSatish Balay row = rbuf2_i[ct1]; 30626fbe8dcSKarl Rupp if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; 307d5b485abSSatish Balay } 308d5b485abSSatish Balay isz[is_no] = isz_i; 309d5b485abSSatish Balay } 310d5b485abSSatish Balay } 3110c468ba9SBarry Smith if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,status2);CHKERRQ(ierr);} 312606d414cSSatish Balay ierr = PetscFree(status2);CHKERRQ(ierr); 313d5b485abSSatish Balay } 314d5b485abSSatish Balay 315d5b485abSSatish Balay for (i=0; i<imax; ++i) { 31670b3c8c7SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);CHKERRQ(ierr); 317d5b485abSSatish Balay } 318d5b485abSSatish Balay 319c7dd2924SSatish Balay 320c7dd2924SSatish Balay ierr = PetscFree(onodes2);CHKERRQ(ierr); 321c7dd2924SSatish Balay ierr = PetscFree(olengths2);CHKERRQ(ierr); 322c7dd2924SSatish Balay 323606d414cSSatish Balay ierr = PetscFree(pa);CHKERRQ(ierr); 324c05d87d6SBarry Smith ierr = PetscFree(rbuf2[0]);CHKERRQ(ierr); 325606d414cSSatish Balay ierr = PetscFree(rbuf2);CHKERRQ(ierr); 326606d414cSSatish Balay ierr = PetscFree(s_waits1);CHKERRQ(ierr); 327606d414cSSatish Balay ierr = PetscFree(r_waits1);CHKERRQ(ierr); 328606d414cSSatish Balay ierr = PetscFree(s_waits2);CHKERRQ(ierr); 329606d414cSSatish Balay ierr = PetscFree(r_waits2);CHKERRQ(ierr); 3308f9f447aSHong Zhang ierr = PetscFree5(table,data,isz,d_p,t_p);CHKERRQ(ierr); 331606d414cSSatish Balay ierr = PetscFree(s_status);CHKERRQ(ierr); 332606d414cSSatish Balay ierr = PetscFree(recv_status);CHKERRQ(ierr); 333606d414cSSatish Balay ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 334606d414cSSatish Balay ierr = PetscFree(xdata);CHKERRQ(ierr); 335606d414cSSatish Balay ierr = PetscFree(isz1);CHKERRQ(ierr); 3363a40ed3dSBarry Smith PetscFunctionReturn(0); 337d5b485abSSatish Balay } 338d5b485abSSatish Balay 3394a2ae208SSatish Balay #undef __FUNCT__ 3404a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Local" 341d5b485abSSatish Balay /* 342df36731bSBarry Smith MatIncreaseOverlap_MPIBAIJ_Local - Called by MatincreaseOverlap, to do 343d5b485abSSatish Balay the work on the local processor. 344d5b485abSSatish Balay 345d5b485abSSatish Balay Inputs: 346df36731bSBarry Smith C - MAT_MPIBAIJ; 347d5b485abSSatish Balay imax - total no of index sets processed at a time; 348df36731bSBarry Smith table - an array of char - size = Mbs bits. 349d5b485abSSatish Balay 350d5b485abSSatish Balay Output: 3510e9b0e7eSHong Zhang isz - array containing the count of the solution elements corresponding 352d5b485abSSatish Balay to each index set; 353d5b485abSSatish Balay data - pointer to the solutions 354d5b485abSSatish Balay */ 355b24ad042SBarry Smith static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data) 356d5b485abSSatish Balay { 357df36731bSBarry Smith Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 358d5b485abSSatish Balay Mat A = c->A,B = c->B; 359df36731bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; 360b24ad042SBarry Smith PetscInt start,end,val,max,rstart,cstart,*ai,*aj; 361b24ad042SBarry Smith PetscInt *bi,*bj,*garray,i,j,k,row,*data_i,isz_i; 362f1af5d2fSBarry Smith PetscBT table_i; 363d5b485abSSatish Balay 3643a40ed3dSBarry Smith PetscFunctionBegin; 365899cda47SBarry Smith rstart = c->rstartbs; 366899cda47SBarry Smith cstart = c->cstartbs; 367d5b485abSSatish Balay ai = a->i; 368df36731bSBarry Smith aj = a->j; 369d5b485abSSatish Balay bi = b->i; 370df36731bSBarry Smith bj = b->j; 371d5b485abSSatish Balay garray = c->garray; 372d5b485abSSatish Balay 373d5b485abSSatish Balay 374d5b485abSSatish Balay for (i=0; i<imax; i++) { 375d5b485abSSatish Balay data_i = data[i]; 376d5b485abSSatish Balay table_i = table[i]; 377d5b485abSSatish Balay isz_i = isz[i]; 378d5b485abSSatish Balay for (j=0,max=isz[i]; j<max; j++) { 379d5b485abSSatish Balay row = data_i[j] - rstart; 380d5b485abSSatish Balay start = ai[row]; 381d5b485abSSatish Balay end = ai[row+1]; 382d5b485abSSatish Balay for (k=start; k<end; k++) { /* Amat */ 383df36731bSBarry Smith val = aj[k] + cstart; 38426fbe8dcSKarl Rupp if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val; 385d5b485abSSatish Balay } 386d5b485abSSatish Balay start = bi[row]; 387d5b485abSSatish Balay end = bi[row+1]; 388d5b485abSSatish Balay for (k=start; k<end; k++) { /* Bmat */ 389df36731bSBarry Smith val = garray[bj[k]]; 39026fbe8dcSKarl Rupp if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val; 391d5b485abSSatish Balay } 392d5b485abSSatish Balay } 393d5b485abSSatish Balay isz[i] = isz_i; 394d5b485abSSatish Balay } 3953a40ed3dSBarry Smith PetscFunctionReturn(0); 396d5b485abSSatish Balay } 3974a2ae208SSatish Balay #undef __FUNCT__ 3984a2ae208SSatish Balay #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Receive" 399d5b485abSSatish Balay /* 400df36731bSBarry Smith MatIncreaseOverlap_MPIBAIJ_Receive - Process the recieved messages, 401d5b485abSSatish Balay and return the output 402d5b485abSSatish Balay 403d5b485abSSatish Balay Input: 404d5b485abSSatish Balay C - the matrix 405d5b485abSSatish Balay nrqr - no of messages being processed. 406d5b485abSSatish Balay rbuf - an array of pointers to the recieved requests 407d5b485abSSatish Balay 408d5b485abSSatish Balay Output: 409d5b485abSSatish Balay xdata - array of messages to be sent back 410d5b485abSSatish Balay isz1 - size of each message 411d5b485abSSatish Balay 412a8c7a070SBarry Smith For better efficiency perhaps we should malloc separately each xdata[i], 413d5b485abSSatish Balay then if a remalloc is required we need only copy the data for that one row 4140e9b0e7eSHong Zhang rather than all previous rows as it is now where a single large chunck of 415d5b485abSSatish Balay memory is used. 416d5b485abSSatish Balay 417d5b485abSSatish Balay */ 418b24ad042SBarry Smith static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1) 419d5b485abSSatish Balay { 420df36731bSBarry Smith Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 421d5b485abSSatish Balay Mat A = c->A,B = c->B; 422df36731bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; 4236849ba73SBarry Smith PetscErrorCode ierr; 424b24ad042SBarry Smith PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k; 425b24ad042SBarry Smith PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end; 426d2a212eaSBarry Smith PetscInt val,max1,max2,Mbs,no_malloc =0,*tmp,new_estimate,ctr; 427b24ad042SBarry Smith PetscInt *rbuf_i,kmax,rbuf_0; 428f1af5d2fSBarry Smith PetscBT xtable; 429d5b485abSSatish Balay 4303a40ed3dSBarry Smith PetscFunctionBegin; 431df36731bSBarry Smith Mbs = c->Mbs; 432899cda47SBarry Smith rstart = c->rstartbs; 433899cda47SBarry Smith cstart = c->cstartbs; 434d5b485abSSatish Balay ai = a->i; 435df36731bSBarry Smith aj = a->j; 436d5b485abSSatish Balay bi = b->i; 437df36731bSBarry Smith bj = b->j; 438d5b485abSSatish Balay garray = c->garray; 439d5b485abSSatish Balay 440d5b485abSSatish Balay 441d5b485abSSatish Balay for (i=0,ct=0,total_sz=0; i<nrqr; ++i) { 442d5b485abSSatish Balay rbuf_i = rbuf[i]; 443d5b485abSSatish Balay rbuf_0 = rbuf_i[0]; 444d5b485abSSatish Balay ct += rbuf_0; 44526fbe8dcSKarl Rupp for (j=1; j<=rbuf_0; j++) total_sz += rbuf_i[2*j]; 446d5b485abSSatish Balay } 447d5b485abSSatish Balay 448701b8814SBarry Smith if (c->Mbs) max1 = ct*(a->nz +b->nz)/c->Mbs; 449701b8814SBarry Smith else max1 = 1; 450d5b485abSSatish Balay mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); 451b24ad042SBarry Smith ierr = PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);CHKERRQ(ierr); 452d5b485abSSatish Balay ++no_malloc; 45353b8de81SBarry Smith ierr = PetscBTCreate(Mbs,&xtable);CHKERRQ(ierr); 454b24ad042SBarry Smith ierr = PetscMemzero(isz1,nrqr*sizeof(PetscInt));CHKERRQ(ierr); 455d5b485abSSatish Balay 456d5b485abSSatish Balay ct3 = 0; 457d5b485abSSatish Balay for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */ 458d5b485abSSatish Balay rbuf_i = rbuf[i]; 459d5b485abSSatish Balay rbuf_0 = rbuf_i[0]; 460d5b485abSSatish Balay ct1 = 2*rbuf_0+1; 461d5b485abSSatish Balay ct2 = ct1; 462d5b485abSSatish Balay ct3 += ct1; 463d5b485abSSatish Balay for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/ 4646831982aSBarry Smith ierr = PetscBTMemzero(Mbs,xtable);CHKERRQ(ierr); 465d5b485abSSatish Balay oct2 = ct2; 466d5b485abSSatish Balay kmax = rbuf_i[2*j]; 467d5b485abSSatish Balay for (k=0; k<kmax; k++,ct1++) { 468d5b485abSSatish Balay row = rbuf_i[ct1]; 469f1af5d2fSBarry Smith if (!PetscBTLookupSet(xtable,row)) { 470d5b485abSSatish Balay if (!(ct3 < mem_estimate)) { 471b24ad042SBarry Smith new_estimate = (PetscInt)(1.5*mem_estimate)+1; 472b24ad042SBarry Smith ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr); 473b24ad042SBarry Smith ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 474606d414cSSatish Balay ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 475d5b485abSSatish Balay xdata[0] = tmp; 476d5b485abSSatish Balay mem_estimate = new_estimate; ++no_malloc; 47726fbe8dcSKarl Rupp for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1]; 478d5b485abSSatish Balay } 479d5b485abSSatish Balay xdata[i][ct2++] = row; 480d5b485abSSatish Balay ct3++; 481d5b485abSSatish Balay } 482d5b485abSSatish Balay } 483d5b485abSSatish Balay for (k=oct2,max2=ct2; k<max2; k++) { 484d5b485abSSatish Balay row = xdata[i][k] - rstart; 485d5b485abSSatish Balay start = ai[row]; 486d5b485abSSatish Balay end = ai[row+1]; 487d5b485abSSatish Balay for (l=start; l<end; l++) { 488df36731bSBarry Smith val = aj[l] + cstart; 489f1af5d2fSBarry Smith if (!PetscBTLookupSet(xtable,val)) { 490d5b485abSSatish Balay if (!(ct3 < mem_estimate)) { 491b24ad042SBarry Smith new_estimate = (PetscInt)(1.5*mem_estimate)+1; 492b24ad042SBarry Smith ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr); 493b24ad042SBarry Smith ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 494606d414cSSatish Balay ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 495d5b485abSSatish Balay xdata[0] = tmp; 496d5b485abSSatish Balay mem_estimate = new_estimate; ++no_malloc; 49726fbe8dcSKarl Rupp for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1]; 498d5b485abSSatish Balay } 499d5b485abSSatish Balay xdata[i][ct2++] = val; 500d5b485abSSatish Balay ct3++; 501d5b485abSSatish Balay } 502d5b485abSSatish Balay } 503d5b485abSSatish Balay start = bi[row]; 504d5b485abSSatish Balay end = bi[row+1]; 505d5b485abSSatish Balay for (l=start; l<end; l++) { 506df36731bSBarry Smith val = garray[bj[l]]; 507f1af5d2fSBarry Smith if (!PetscBTLookupSet(xtable,val)) { 508d5b485abSSatish Balay if (!(ct3 < mem_estimate)) { 509b24ad042SBarry Smith new_estimate = (PetscInt)(1.5*mem_estimate)+1; 510b24ad042SBarry Smith ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr); 511b24ad042SBarry Smith ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 512606d414cSSatish Balay ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 513d5b485abSSatish Balay xdata[0] = tmp; 514d5b485abSSatish Balay mem_estimate = new_estimate; ++no_malloc; 51526fbe8dcSKarl Rupp for (ctr =1; ctr <=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1]; 516d5b485abSSatish Balay } 517d5b485abSSatish Balay xdata[i][ct2++] = val; 518d5b485abSSatish Balay ct3++; 519d5b485abSSatish Balay } 520d5b485abSSatish Balay } 521d5b485abSSatish Balay } 522d5b485abSSatish Balay /* Update the header*/ 523d5b485abSSatish Balay xdata[i][2*j] = ct2 - oct2; /* Undo the vector isz1 and use only a var*/ 524d5b485abSSatish Balay xdata[i][2*j-1] = rbuf_i[2*j-1]; 525d5b485abSSatish Balay } 526d5b485abSSatish Balay xdata[i][0] = rbuf_0; 527d5b485abSSatish Balay xdata[i+1] = xdata[i] + ct2; 528d5b485abSSatish Balay isz1[i] = ct2; /* size of each message */ 529d5b485abSSatish Balay } 53094bacf5dSBarry Smith ierr = PetscBTDestroy(&xtable);CHKERRQ(ierr); 5311e2582c4SBarry Smith ierr = PetscInfo3(C,"Allocated %D bytes, required %D, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);CHKERRQ(ierr); 5323a40ed3dSBarry Smith PetscFunctionReturn(0); 533d5b485abSSatish Balay } 534d5b485abSSatish Balay 5354a2ae208SSatish Balay #undef __FUNCT__ 5364a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ" 537b24ad042SBarry Smith PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) 538d5b485abSSatish Balay { 53936f4e84dSSatish Balay IS *isrow_new,*iscol_new; 540cf4f527aSSatish Balay Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 5416849ba73SBarry Smith PetscErrorCode ierr; 5424da72fa9SHong Zhang PetscInt nmax,nstages_local,nstages,i,pos,max_no,ncol,nrow,N=C->cmap->N,bs=C->rmap->bs; 5434da72fa9SHong Zhang PetscBool colflag,*allcolumns,*allrows; 544a2feddc5SSatish Balay 5453a40ed3dSBarry Smith PetscFunctionBegin; 54629dcf524SDmitry Karpeev /* Currently, unsorted column indices will result in inverted column indices in the resulting submatrices. */ 54729dcf524SDmitry Karpeev for (i = 0; i < ismax; ++i) { 54829dcf524SDmitry Karpeev PetscBool sorted; 54929dcf524SDmitry Karpeev ierr = ISSorted(iscol[i], &sorted);CHKERRQ(ierr); 550c7e6e2c7SJed Brown if (!sorted) SETERRQ1(((PetscObject)iscol[i])->comm, PETSC_ERR_SUP, "Column index set %D not sorted", i); 55129dcf524SDmitry Karpeev } 55229dcf524SDmitry Karpeev /* The compression and expansion should be avoided. Doesn't point 55329dcf524SDmitry Karpeev out errors, might change the indices, hence buggey */ 554*dcca6d9dSJed Brown ierr = PetscMalloc2(ismax+1,&isrow_new,ismax+1,&iscol_new);CHKERRQ(ierr); 55505d8c843SHong Zhang ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,ismax,isrow,isrow_new);CHKERRQ(ierr); 55605d8c843SHong Zhang ierr = ISCompressIndicesGeneral(N,C->cmap->n,bs,ismax,iscol,iscol_new);CHKERRQ(ierr); 557cf4f527aSSatish Balay 558307b7a18SHong Zhang /* Check for special case: each processor gets entire matrix columns */ 559*dcca6d9dSJed Brown ierr = PetscMalloc2(ismax+1,&allcolumns,ismax+1,&allrows);CHKERRQ(ierr); 560307b7a18SHong Zhang for (i=0; i<ismax; i++) { 561307b7a18SHong Zhang ierr = ISIdentity(iscol[i],&colflag);CHKERRQ(ierr); 562307b7a18SHong Zhang ierr = ISGetLocalSize(iscol[i],&ncol);CHKERRQ(ierr); 56326fbe8dcSKarl Rupp if (colflag && ncol == C->cmap->N) allcolumns[i] = PETSC_TRUE; 56426fbe8dcSKarl Rupp else allcolumns[i] = PETSC_FALSE; 5654da72fa9SHong Zhang 5664da72fa9SHong Zhang ierr = ISIdentity(isrow[i],&colflag);CHKERRQ(ierr); 5674da72fa9SHong Zhang ierr = ISGetLocalSize(isrow[i],&nrow);CHKERRQ(ierr); 56826fbe8dcSKarl Rupp if (colflag && nrow == C->rmap->N) allrows[i] = PETSC_TRUE; 56926fbe8dcSKarl Rupp else allrows[i] = PETSC_FALSE; 570307b7a18SHong Zhang } 571307b7a18SHong Zhang 572cf4f527aSSatish Balay /* Allocate memory to hold all the submatrices */ 573cf4f527aSSatish Balay if (scall != MAT_REUSE_MATRIX) { 57482502324SSatish Balay ierr = PetscMalloc((ismax+1)*sizeof(Mat),submat);CHKERRQ(ierr); 575cf4f527aSSatish Balay } 576cf4f527aSSatish Balay /* Determine the number of stages through which submatrices are done */ 577b24ad042SBarry Smith nmax = 20*1000000 / (c->Nbs * sizeof(PetscInt)); 578329f5518SBarry Smith if (!nmax) nmax = 1; 579cf4f527aSSatish Balay nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0); 580cf4f527aSSatish Balay 581653e4784SBarry Smith /* Make sure every processor loops through the nstages */ 582ce94432eSBarry Smith ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));CHKERRQ(ierr); 583cf4f527aSSatish Balay for (i=0,pos=0; i<nstages; i++) { 584cf4f527aSSatish Balay if (pos+nmax <= ismax) max_no = nmax; 585cf4f527aSSatish Balay else if (pos == ismax) max_no = 0; 586cf4f527aSSatish Balay else max_no = ismax-pos; 5874da72fa9SHong Zhang ierr = MatGetSubMatrices_MPIBAIJ_local(C,max_no,isrow_new+pos,iscol_new+pos,scall,allrows+pos,allcolumns+pos,*submat+pos);CHKERRQ(ierr); 588cf4f527aSSatish Balay pos += max_no; 589cf4f527aSSatish Balay } 59036f4e84dSSatish Balay 59136f4e84dSSatish Balay for (i=0; i<ismax; i++) { 5926bf464f9SBarry Smith ierr = ISDestroy(&isrow_new[i]);CHKERRQ(ierr); 5936bf464f9SBarry Smith ierr = ISDestroy(&iscol_new[i]);CHKERRQ(ierr); 59436f4e84dSSatish Balay } 59523bdbc58SBarry Smith ierr = PetscFree2(isrow_new,iscol_new);CHKERRQ(ierr); 5964da72fa9SHong Zhang ierr = PetscFree2(allcolumns,allrows);CHKERRQ(ierr); 5973a40ed3dSBarry Smith PetscFunctionReturn(0); 598a2feddc5SSatish Balay } 599a2feddc5SSatish Balay 600233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 6014a2ae208SSatish Balay #undef __FUNCT__ 6024a2ae208SSatish Balay #define __FUNCT__ "PetscGetProc" 603e005ede5SBarry Smith PetscErrorCode PetscGetProc(const PetscInt row, const PetscMPIInt size, const PetscInt proc_gnode[], PetscMPIInt *rank) 604233c2ff6SSatish Balay { 605e005ede5SBarry Smith PetscInt nGlobalNd = proc_gnode[size]; 6064dc2109aSBarry Smith PetscMPIInt fproc; 6074dc2109aSBarry Smith PetscErrorCode ierr; 608233c2ff6SSatish Balay 609233c2ff6SSatish Balay PetscFunctionBegin; 6104dc2109aSBarry Smith ierr = PetscMPIIntCast((PetscInt)(((float)row * (float)size / (float)nGlobalNd + 0.5)),&fproc);CHKERRQ(ierr); 61123ce1328SBarry Smith if (fproc > size) fproc = size; 612e005ede5SBarry Smith while (row < proc_gnode[fproc] || row >= proc_gnode[fproc+1]) { 613e005ede5SBarry Smith if (row < proc_gnode[fproc]) fproc--; 614233c2ff6SSatish Balay else fproc++; 615233c2ff6SSatish Balay } 616e005ede5SBarry Smith *rank = fproc; 617233c2ff6SSatish Balay PetscFunctionReturn(0); 618233c2ff6SSatish Balay } 619233c2ff6SSatish Balay #endif 620233c2ff6SSatish Balay 621a2feddc5SSatish Balay /* -------------------------------------------------------------------------*/ 622b24ad042SBarry Smith /* This code is used for BAIJ and SBAIJ matrices (unfortunate dependency) */ 6234a2ae208SSatish Balay #undef __FUNCT__ 6244a2ae208SSatish Balay #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ_local" 6254da72fa9SHong Zhang PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,PetscBool *allrows,PetscBool *allcolumns,Mat *submats) 626a2feddc5SSatish Balay { 627df36731bSBarry Smith Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 628cf4f527aSSatish Balay Mat A = c->A; 629df36731bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*mat; 6305d0c19d7SBarry Smith const PetscInt **irow,**icol,*irow_i; 6315d0c19d7SBarry Smith PetscInt *nrow,*ncol,*w3,*w4,start; 6326849ba73SBarry Smith PetscErrorCode ierr; 6339405f653SBarry Smith PetscMPIInt size,tag0,tag1,tag2,tag3,*w1,*w2,nrqr,idex,end,proc; 6349405f653SBarry Smith PetscInt **sbuf1,**sbuf2,rank,i,j,k,l,ct1,ct2,**rbuf1,row; 635b24ad042SBarry Smith PetscInt nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol; 636b24ad042SBarry Smith PetscInt **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2; 6375d0c19d7SBarry Smith PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax; 638052f0c41SBarry Smith PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; 639d0f46423SBarry Smith PetscInt bs =C->rmap->bs,bs2=c->bs2,*a_j=a->j,*b_j=b->j,*cworkA,*cworkB; 640899cda47SBarry Smith PetscInt cstart = c->cstartbs,nzA,nzB,*a_i=a->i,*b_i=b->i,imark; 641899cda47SBarry Smith PetscInt *bmap = c->garray,ctmp,rstart=c->rstartbs; 6427a868f3eSHong Zhang MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3,*s_waits3; 6437a868f3eSHong Zhang MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2,*r_status3; 644d5b485abSSatish Balay MPI_Comm comm; 645ace3abfcSBarry Smith PetscBool flag; 646b24ad042SBarry Smith PetscMPIInt *onodes1,*olengths1; 6477a868f3eSHong Zhang PetscBool ijonly=c->ijonly; /* private flag indicates only matrix data structures are requested */ 64826fbe8dcSKarl Rupp 6497a868f3eSHong Zhang /* variables below are used for the matrix numerical values - case of !ijonly */ 6507a868f3eSHong Zhang MPI_Request *r_waits4,*s_waits4; 6517a868f3eSHong Zhang MPI_Status *r_status4,*s_status4; 6520298fd71SBarry Smith MatScalar **rbuf4,**sbuf_aa,*vals,*mat_a = NULL,*sbuf_aa_i,*vworkA = NULL,*vworkB = NULL; 6537a868f3eSHong Zhang MatScalar *a_a=a->a,*b_a=b->a; 654c7dd2924SSatish Balay 65580d608c0SSatish Balay #if defined(PETSC_USE_CTABLE) 656b24ad042SBarry Smith PetscInt tt; 6570298fd71SBarry Smith PetscTable *rmap,*cmap,rmap_i,cmap_i=NULL; 658233c2ff6SSatish Balay #else 6590298fd71SBarry Smith PetscInt **cmap,*cmap_i=NULL,*rtable,*rmap_i,**rmap, Mbs = c->Mbs; 660233c2ff6SSatish Balay #endif 661d5b485abSSatish Balay 6623a40ed3dSBarry Smith PetscFunctionBegin; 663ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 6647adad957SLisandro Dalcin tag0 = ((PetscObject)C)->tag; 665d5b485abSSatish Balay size = c->size; 666d5b485abSSatish Balay rank = c->rank; 667d5b485abSSatish Balay 66834e6ae18SSatish Balay /* Get some new tags to keep the communication clean */ 66934e6ae18SSatish Balay ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 67034e6ae18SSatish Balay ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 67134e6ae18SSatish Balay ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); 67234e6ae18SSatish Balay 673052f0c41SBarry Smith #if defined(PETSC_USE_CTABLE) 674*dcca6d9dSJed Brown ierr = PetscMalloc4(ismax,&irow,ismax,&icol,ismax,&nrow,ismax,&ncol);CHKERRQ(ierr); 675052f0c41SBarry Smith #else 676*dcca6d9dSJed Brown ierr = PetscMalloc5(ismax,&irow,ismax,&icol,ismax,&nrow,ismax,&ncol,Mbs+1,&rtable);CHKERRQ(ierr); 677d5b485abSSatish Balay /* Create hash table for the mapping :row -> proc*/ 678d5b485abSSatish Balay for (i=0,j=0; i<size; i++) { 67982a7e548SBarry Smith jmax = C->rmap->range[i+1]/bs; 68026fbe8dcSKarl Rupp for (; j<jmax; j++) rtable[j] = i; 681d5b485abSSatish Balay } 682233c2ff6SSatish Balay #endif 683233c2ff6SSatish Balay 684233c2ff6SSatish Balay for (i=0; i<ismax; i++) { 6854da72fa9SHong Zhang if (allrows[i]) { 6860298fd71SBarry Smith irow[i] = NULL; 6874da72fa9SHong Zhang nrow[i] = C->rmap->N/bs; 6884da72fa9SHong Zhang } else { 689233c2ff6SSatish Balay ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); 690233c2ff6SSatish Balay ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); 6914da72fa9SHong Zhang } 6924da72fa9SHong Zhang 693307b7a18SHong Zhang if (allcolumns[i]) { 6940298fd71SBarry Smith icol[i] = NULL; 695307b7a18SHong Zhang ncol[i] = C->cmap->N/bs; 696307b7a18SHong Zhang } else { 697307b7a18SHong Zhang ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); 698233c2ff6SSatish Balay ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); 699233c2ff6SSatish Balay } 700307b7a18SHong Zhang } 701d5b485abSSatish Balay 702d5b485abSSatish Balay /* evaluate communication - mesg to who,length of mesg,and buffer space 703d5b485abSSatish Balay required. Based on this, buffers are allocated, and data copied into them*/ 704*dcca6d9dSJed Brown ierr = PetscMalloc4(size,&w1,size,&w2,size,&w3,size,&w4);CHKERRQ(ierr); 70523bdbc58SBarry Smith ierr = PetscMemzero(w1,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 70623bdbc58SBarry Smith ierr = PetscMemzero(w2,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 70723bdbc58SBarry Smith ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); 708d5b485abSSatish Balay for (i=0; i<ismax; i++) { 709b24ad042SBarry Smith ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/ 710d5b485abSSatish Balay jmax = nrow[i]; 711d5b485abSSatish Balay irow_i = irow[i]; 712d5b485abSSatish Balay for (j=0; j<jmax; j++) { 71326fbe8dcSKarl Rupp if (allrows[i]) row = j; 71426fbe8dcSKarl Rupp else row = irow_i[j]; 71526fbe8dcSKarl Rupp 716233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 717899cda47SBarry Smith ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 718233c2ff6SSatish Balay #else 719d5b485abSSatish Balay proc = rtable[row]; 720233c2ff6SSatish Balay #endif 721d5b485abSSatish Balay w4[proc]++; 722d5b485abSSatish Balay } 723d5b485abSSatish Balay for (j=0; j<size; j++) { 724d5b485abSSatish Balay if (w4[j]) { w1[j] += w4[j]; w3[j]++;} 725d5b485abSSatish Balay } 726d5b485abSSatish Balay } 727d5b485abSSatish Balay 728d5b485abSSatish Balay nrqs = 0; /* no of outgoing messages */ 729e757655aSSatish Balay msz = 0; /* total mesg length for all proc */ 730d5b485abSSatish Balay w1[rank] = 0; /* no mesg sent to intself */ 731d5b485abSSatish Balay w3[rank] = 0; 732d5b485abSSatish Balay for (i=0; i<size; i++) { 733d5b485abSSatish Balay if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 734d5b485abSSatish Balay } 735b24ad042SBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ 736d5b485abSSatish Balay for (i=0,j=0; i<size; i++) { 737d5b485abSSatish Balay if (w1[i]) { pa[j] = i; j++; } 738d5b485abSSatish Balay } 739d5b485abSSatish Balay 740d5b485abSSatish Balay /* Each message would have a header = 1 + 2*(no of IS) + data */ 741d5b485abSSatish Balay for (i=0; i<nrqs; i++) { 742d5b485abSSatish Balay j = pa[i]; 743d5b485abSSatish Balay w1[j] += w2[j] + 2* w3[j]; 744d5b485abSSatish Balay msz += w1[j]; 745d5b485abSSatish Balay } 746d5b485abSSatish Balay 747c7dd2924SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 748a96d083dSSatish Balay ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr); 749c7dd2924SSatish Balay ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr); 750d5b485abSSatish Balay 751c7dd2924SSatish Balay /* Now post the Irecvs corresponding to these messages */ 752c7dd2924SSatish Balay ierr = PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);CHKERRQ(ierr); 753c7dd2924SSatish Balay 754c7dd2924SSatish Balay ierr = PetscFree(onodes1);CHKERRQ(ierr); 755c7dd2924SSatish Balay ierr = PetscFree(olengths1);CHKERRQ(ierr); 756d5b485abSSatish Balay 757d5b485abSSatish Balay /* Allocate Memory for outgoing messages */ 758*dcca6d9dSJed Brown ierr = PetscMalloc4(size,&sbuf1,size,&ptr,2*msz,&tmp,size,&ctr);CHKERRQ(ierr); 75923bdbc58SBarry Smith ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); 76023bdbc58SBarry Smith ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); 761d5b485abSSatish Balay { 762b24ad042SBarry Smith PetscInt *iptr = tmp,ict = 0; 763d5b485abSSatish Balay for (i=0; i<nrqs; i++) { 764d5b485abSSatish Balay j = pa[i]; 765d5b485abSSatish Balay iptr += ict; 766d5b485abSSatish Balay sbuf1[j] = iptr; 767d5b485abSSatish Balay ict = w1[j]; 768d5b485abSSatish Balay } 769d5b485abSSatish Balay } 770d5b485abSSatish Balay 771d5b485abSSatish Balay /* Form the outgoing messages */ 772d5b485abSSatish Balay /* Initialise the header space */ 773d5b485abSSatish Balay for (i=0; i<nrqs; i++) { 774d5b485abSSatish Balay j = pa[i]; 775d5b485abSSatish Balay sbuf1[j][0] = 0; 776b24ad042SBarry Smith ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); 777d5b485abSSatish Balay ptr[j] = sbuf1[j] + 2*w3[j] + 1; 778d5b485abSSatish Balay } 779d5b485abSSatish Balay 780d5b485abSSatish Balay /* Parse the isrow and copy data into outbuf */ 781d5b485abSSatish Balay for (i=0; i<ismax; i++) { 782b24ad042SBarry Smith ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); 783d5b485abSSatish Balay irow_i = irow[i]; 784d5b485abSSatish Balay jmax = nrow[i]; 785d5b485abSSatish Balay for (j=0; j<jmax; j++) { /* parse the indices of each IS */ 78626fbe8dcSKarl Rupp if (allrows[i]) row = j; 78726fbe8dcSKarl Rupp else row = irow_i[j]; 78826fbe8dcSKarl Rupp 789233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 790899cda47SBarry Smith ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 791233c2ff6SSatish Balay #else 792d5b485abSSatish Balay proc = rtable[row]; 793233c2ff6SSatish Balay #endif 794d5b485abSSatish Balay if (proc != rank) { /* copy to the outgoing buf*/ 795d5b485abSSatish Balay ctr[proc]++; 796d5b485abSSatish Balay *ptr[proc] = row; 797d5b485abSSatish Balay ptr[proc]++; 798d5b485abSSatish Balay } 799d5b485abSSatish Balay } 800d5b485abSSatish Balay /* Update the headers for the current IS */ 801d5b485abSSatish Balay for (j=0; j<size; j++) { /* Can Optimise this loop too */ 80206ef35abSSatish Balay if ((ctr_j = ctr[j])) { 803d5b485abSSatish Balay sbuf1_j = sbuf1[j]; 804d5b485abSSatish Balay k = ++sbuf1_j[0]; 805d5b485abSSatish Balay sbuf1_j[2*k] = ctr_j; 806d5b485abSSatish Balay sbuf1_j[2*k-1] = i; 807d5b485abSSatish Balay } 808d5b485abSSatish Balay } 809d5b485abSSatish Balay } 810d5b485abSSatish Balay 811d5b485abSSatish Balay /* Now post the sends */ 812b0a32e0cSBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); 813d5b485abSSatish Balay for (i=0; i<nrqs; ++i) { 814d5b485abSSatish Balay j = pa[i]; 815b24ad042SBarry Smith ierr = MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); 816d5b485abSSatish Balay } 817d5b485abSSatish Balay 818d5b485abSSatish Balay /* Post Recieves to capture the buffer size */ 819b0a32e0cSBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); 820b24ad042SBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);CHKERRQ(ierr); 821d5b485abSSatish Balay rbuf2[0] = tmp + msz; 822d5b485abSSatish Balay for (i=1; i<nrqs; ++i) { 823d5b485abSSatish Balay j = pa[i]; 824d5b485abSSatish Balay rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]]; 825d5b485abSSatish Balay } 826d5b485abSSatish Balay for (i=0; i<nrqs; ++i) { 827d5b485abSSatish Balay j = pa[i]; 828b24ad042SBarry Smith ierr = MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); 829d5b485abSSatish Balay } 830d5b485abSSatish Balay 831d5b485abSSatish Balay /* Send to other procs the buf size they should allocate */ 832d5b485abSSatish Balay 833d5b485abSSatish Balay /* Receive messages*/ 834b0a32e0cSBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); 835b0a32e0cSBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); 836*dcca6d9dSJed Brown ierr = PetscMalloc3(nrqr+1,&sbuf2,nrqr,&req_size,nrqr,&req_source);CHKERRQ(ierr); 837d5b485abSSatish Balay { 838df36731bSBarry Smith Mat_SeqBAIJ *sA = (Mat_SeqBAIJ*)c->A->data,*sB = (Mat_SeqBAIJ*)c->B->data; 839b24ad042SBarry Smith PetscInt *sAi = sA->i,*sBi = sB->i,id,*sbuf2_i; 840d5b485abSSatish Balay 841d5b485abSSatish Balay for (i=0; i<nrqr; ++i) { 842999d9058SBarry Smith ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); 84326fbe8dcSKarl Rupp 844999d9058SBarry Smith req_size[idex] = 0; 845999d9058SBarry Smith rbuf1_i = rbuf1[idex]; 846d5b485abSSatish Balay start = 2*rbuf1_i[0] + 1; 847b24ad042SBarry Smith ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); 848b24ad042SBarry Smith ierr = PetscMalloc(end*sizeof(PetscInt),&sbuf2[idex]);CHKERRQ(ierr); 849999d9058SBarry Smith sbuf2_i = sbuf2[idex]; 850d5b485abSSatish Balay for (j=start; j<end; j++) { 851d5b485abSSatish Balay id = rbuf1_i[j] - rstart; 852d5b485abSSatish Balay ncols = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id]; 853d5b485abSSatish Balay sbuf2_i[j] = ncols; 854999d9058SBarry Smith req_size[idex] += ncols; 855d5b485abSSatish Balay } 856999d9058SBarry Smith req_source[idex] = r_status1[i].MPI_SOURCE; 857d5b485abSSatish Balay /* form the header */ 858999d9058SBarry Smith sbuf2_i[0] = req_size[idex]; 85926fbe8dcSKarl Rupp for (j=1; j<start; j++) sbuf2_i[j] = rbuf1_i[j]; 860b24ad042SBarry Smith ierr = MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);CHKERRQ(ierr); 861d5b485abSSatish Balay } 862d5b485abSSatish Balay } 863606d414cSSatish Balay ierr = PetscFree(r_status1);CHKERRQ(ierr); 864606d414cSSatish Balay ierr = PetscFree(r_waits1);CHKERRQ(ierr); 865d5b485abSSatish Balay 866d5b485abSSatish Balay /* recv buffer sizes */ 867d5b485abSSatish Balay /* Receive messages*/ 868b24ad042SBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);CHKERRQ(ierr); 869b0a32e0cSBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);CHKERRQ(ierr); 870b0a32e0cSBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); 8717a868f3eSHong Zhang if (!ijonly) { 8727a868f3eSHong Zhang ierr = PetscMalloc((nrqs+1)*sizeof(MatScalar*),&rbuf4);CHKERRQ(ierr); 8737a868f3eSHong Zhang ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);CHKERRQ(ierr); 8747a868f3eSHong Zhang } 875d5b485abSSatish Balay 876d5b485abSSatish Balay for (i=0; i<nrqs; ++i) { 877999d9058SBarry Smith ierr = MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);CHKERRQ(ierr); 878b24ad042SBarry Smith ierr = PetscMalloc(rbuf2[idex][0]*sizeof(PetscInt),&rbuf3[idex]);CHKERRQ(ierr); 879b24ad042SBarry Smith ierr = MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);CHKERRQ(ierr); 8807a868f3eSHong Zhang if (!ijonly) { 8817a868f3eSHong Zhang ierr = PetscMalloc(rbuf2[idex][0]*bs2*sizeof(MatScalar),&rbuf4[idex]);CHKERRQ(ierr); 882b24ad042SBarry Smith ierr = MPI_Irecv(rbuf4[idex],rbuf2[idex][0]*bs2,MPIU_MATSCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);CHKERRQ(ierr); 883d5b485abSSatish Balay } 8847a868f3eSHong Zhang } 885606d414cSSatish Balay ierr = PetscFree(r_status2);CHKERRQ(ierr); 886606d414cSSatish Balay ierr = PetscFree(r_waits2);CHKERRQ(ierr); 887d5b485abSSatish Balay 888d5b485abSSatish Balay /* Wait on sends1 and sends2 */ 889b0a32e0cSBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); 890b0a32e0cSBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); 891d5b485abSSatish Balay 8920c468ba9SBarry Smith if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} 8930c468ba9SBarry Smith if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} 894606d414cSSatish Balay ierr = PetscFree(s_status1);CHKERRQ(ierr); 895606d414cSSatish Balay ierr = PetscFree(s_status2);CHKERRQ(ierr); 896606d414cSSatish Balay ierr = PetscFree(s_waits1);CHKERRQ(ierr); 897606d414cSSatish Balay ierr = PetscFree(s_waits2);CHKERRQ(ierr); 898d5b485abSSatish Balay 899d5b485abSSatish Balay /* Now allocate buffers for a->j, and send them off */ 900b24ad042SBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);CHKERRQ(ierr); 901d5b485abSSatish Balay for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 902b24ad042SBarry Smith ierr = PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);CHKERRQ(ierr); 903d5b485abSSatish Balay for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1]; 904d5b485abSSatish Balay 905b0a32e0cSBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);CHKERRQ(ierr); 906d5b485abSSatish Balay { 907d5b485abSSatish Balay for (i=0; i<nrqr; i++) { 908d5b485abSSatish Balay rbuf1_i = rbuf1[i]; 909d5b485abSSatish Balay sbuf_aj_i = sbuf_aj[i]; 910d5b485abSSatish Balay ct1 = 2*rbuf1_i[0] + 1; 911d5b485abSSatish Balay ct2 = 0; 912d5b485abSSatish Balay for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 913d5b485abSSatish Balay kmax = rbuf1[i][2*j]; 914d5b485abSSatish Balay for (k=0; k<kmax; k++,ct1++) { 915e8e0db45SSatish Balay row = rbuf1_i[ct1] - rstart; 916d5b485abSSatish Balay nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 917d5b485abSSatish Balay ncols = nzA + nzB; 918d5b485abSSatish Balay cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 919d5b485abSSatish Balay 920d5b485abSSatish Balay /* load the column indices for this row into cols*/ 921d5b485abSSatish Balay cols = sbuf_aj_i + ct2; 922d5b485abSSatish Balay for (l=0; l<nzB; l++) { 923d5b485abSSatish Balay if ((ctmp = bmap[cworkB[l]]) < cstart) cols[l] = ctmp; 924d5b485abSSatish Balay else break; 925d5b485abSSatish Balay } 926d5b485abSSatish Balay imark = l; 927d5b485abSSatish Balay for (l=0; l<nzA; l++) cols[imark+l] = cstart + cworkA[l]; 928d5b485abSSatish Balay for (l=imark; l<nzB; l++) cols[nzA+l] = bmap[cworkB[l]]; 929d5b485abSSatish Balay ct2 += ncols; 930d5b485abSSatish Balay } 931d5b485abSSatish Balay } 932b24ad042SBarry Smith ierr = MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);CHKERRQ(ierr); 933d5b485abSSatish Balay } 934d5b485abSSatish Balay } 935b0a32e0cSBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);CHKERRQ(ierr); 936b0a32e0cSBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);CHKERRQ(ierr); 937d5b485abSSatish Balay 938d5b485abSSatish Balay /* Allocate buffers for a->a, and send them off */ 9397a868f3eSHong Zhang if (!ijonly) { 94082502324SSatish Balay ierr = PetscMalloc((nrqr+1)*sizeof(MatScalar*),&sbuf_aa);CHKERRQ(ierr); 941d5b485abSSatish Balay for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 94282502324SSatish Balay ierr = PetscMalloc((j+1)*bs2*sizeof(MatScalar),&sbuf_aa[0]);CHKERRQ(ierr); 943a2feddc5SSatish Balay for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1]*bs2; 944d5b485abSSatish Balay 945b0a32e0cSBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);CHKERRQ(ierr); 946d5b485abSSatish Balay { 947d5b485abSSatish Balay for (i=0; i<nrqr; i++) { 948d5b485abSSatish Balay rbuf1_i = rbuf1[i]; 949d5b485abSSatish Balay sbuf_aa_i = sbuf_aa[i]; 950d5b485abSSatish Balay ct1 = 2*rbuf1_i[0]+1; 951d5b485abSSatish Balay ct2 = 0; 952d5b485abSSatish Balay for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 953d5b485abSSatish Balay kmax = rbuf1_i[2*j]; 954d5b485abSSatish Balay for (k=0; k<kmax; k++,ct1++) { 955e8e0db45SSatish Balay row = rbuf1_i[ct1] - rstart; 956d5b485abSSatish Balay nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 957d5b485abSSatish Balay ncols = nzA + nzB; 958d5b485abSSatish Balay cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 959a2feddc5SSatish Balay vworkA = a_a + a_i[row]*bs2; vworkB = b_a + b_i[row]*bs2; 960d5b485abSSatish Balay 961d5b485abSSatish Balay /* load the column values for this row into vals*/ 9625b83ace0SSatish Balay vals = sbuf_aa_i+ct2*bs2; 963d5b485abSSatish Balay for (l=0; l<nzB; l++) { 9643a40ed3dSBarry Smith if ((bmap[cworkB[l]]) < cstart) { 9653eda8832SBarry Smith ierr = PetscMemcpy(vals+l*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 96626fbe8dcSKarl Rupp } else break; 967d5b485abSSatish Balay } 968d5b485abSSatish Balay imark = l; 9693a40ed3dSBarry Smith for (l=0; l<nzA; l++) { 9703eda8832SBarry Smith ierr = PetscMemcpy(vals+(imark+l)*bs2,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 9713a40ed3dSBarry Smith } 9723a40ed3dSBarry Smith for (l=imark; l<nzB; l++) { 9733eda8832SBarry Smith ierr = PetscMemcpy(vals+(nzA+l)*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 9743a40ed3dSBarry Smith } 975d5b485abSSatish Balay ct2 += ncols; 976d5b485abSSatish Balay } 977d5b485abSSatish Balay } 9783eda8832SBarry Smith ierr = MPI_Isend(sbuf_aa_i,req_size[i]*bs2,MPIU_MATSCALAR,req_source[i],tag3,comm,s_waits4+i);CHKERRQ(ierr); 979d5b485abSSatish Balay } 980d5b485abSSatish Balay } 981b0a32e0cSBarry Smith ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);CHKERRQ(ierr); 982b0a32e0cSBarry Smith ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);CHKERRQ(ierr); 9837a868f3eSHong Zhang } 984533163c2SBarry Smith ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); 985606d414cSSatish Balay ierr = PetscFree(rbuf1);CHKERRQ(ierr); 986d5b485abSSatish Balay 987d5b485abSSatish Balay /* Form the matrix */ 988307b7a18SHong Zhang /* create col map: global col of C -> local col of submatrices */ 989d5b485abSSatish Balay { 9905d0c19d7SBarry Smith const PetscInt *icol_i; 991233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 9928fa52d88SHong Zhang ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&cmap);CHKERRQ(ierr); 993ff0794f7SSatish Balay for (i=0; i<ismax; i++) { 994307b7a18SHong Zhang if (!allcolumns[i]) { 9958fa52d88SHong Zhang ierr = PetscTableCreate(ncol[i]+1,c->Nbs+1,&cmap[i]);CHKERRQ(ierr); 996307b7a18SHong Zhang jmax = ncol[i]; 997307b7a18SHong Zhang icol_i = icol[i]; 9988fa52d88SHong Zhang cmap_i = cmap[i]; 999307b7a18SHong Zhang for (j=0; j<jmax; j++) { 10003861aac3SJed Brown ierr = PetscTableAdd(cmap_i,icol_i[j]+1,j+1,INSERT_VALUES);CHKERRQ(ierr); 1001307b7a18SHong Zhang } 1002307b7a18SHong Zhang } else { 10030298fd71SBarry Smith cmap[i] = NULL; 1004307b7a18SHong Zhang } 1005233c2ff6SSatish Balay } 1006233c2ff6SSatish Balay #else 100723bdbc58SBarry Smith ierr = PetscMalloc(ismax*sizeof(PetscInt*),&cmap);CHKERRQ(ierr); 1008d5b485abSSatish Balay for (i=0; i<ismax; i++) { 10098fa52d88SHong Zhang if (!allcolumns[i]) { 10108fa52d88SHong Zhang ierr = PetscMalloc(c->Nbs*sizeof(PetscInt),&cmap[i]);CHKERRQ(ierr); 10118fa52d88SHong Zhang ierr = PetscMemzero(cmap[i],c->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 1012d5b485abSSatish Balay jmax = ncol[i]; 1013d5b485abSSatish Balay icol_i = icol[i]; 1014d5b485abSSatish Balay cmap_i = cmap[i]; 1015d5b485abSSatish Balay for (j=0; j<jmax; j++) { 1016d5b485abSSatish Balay cmap_i[icol_i[j]] = j+1; 1017d5b485abSSatish Balay } 10188fa52d88SHong Zhang } else { /* allcolumns[i] */ 10190298fd71SBarry Smith cmap[i] = NULL; 10208fa52d88SHong Zhang } 1021d5b485abSSatish Balay } 1022307b7a18SHong Zhang #endif 1023d5b485abSSatish Balay } 1024d5b485abSSatish Balay 1025d5b485abSSatish Balay /* Create lens which is required for MatCreate... */ 102626fbe8dcSKarl Rupp for (i=0,j=0; i<ismax; i++) j += nrow[i]; 1027052f0c41SBarry Smith ierr = PetscMalloc((1+ismax)*sizeof(PetscInt*)+ j*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1028b24ad042SBarry Smith lens[0] = (PetscInt*)(lens + ismax); 1029b24ad042SBarry Smith ierr = PetscMemzero(lens[0],j*sizeof(PetscInt));CHKERRQ(ierr); 103026fbe8dcSKarl Rupp for (i=1; i<ismax; i++) lens[i] = lens[i-1] + nrow[i-1]; 1031d5b485abSSatish Balay 1032d5b485abSSatish Balay /* Update lens from local data */ 1033d5b485abSSatish Balay for (i=0; i<ismax; i++) { 1034d5b485abSSatish Balay jmax = nrow[i]; 10358fa52d88SHong Zhang if (!allcolumns[i]) cmap_i = cmap[i]; 1036d5b485abSSatish Balay irow_i = irow[i]; 1037d5b485abSSatish Balay lens_i = lens[i]; 1038d5b485abSSatish Balay for (j=0; j<jmax; j++) { 103926fbe8dcSKarl Rupp if (allrows[i]) row = j; 104026fbe8dcSKarl Rupp else row = irow_i[j]; 104126fbe8dcSKarl Rupp 1042233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 1043899cda47SBarry Smith ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 1044233c2ff6SSatish Balay #else 1045d5b485abSSatish Balay proc = rtable[row]; 1046233c2ff6SSatish Balay #endif 1047d5b485abSSatish Balay if (proc == rank) { 104836f4e84dSSatish Balay /* Get indices from matA and then from matB */ 104936f4e84dSSatish Balay row = row - rstart; 105036f4e84dSSatish Balay nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 105136f4e84dSSatish Balay cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 1052307b7a18SHong Zhang if (!allcolumns[i]) { 1053233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 1054233c2ff6SSatish Balay for (k=0; k<nzA; k++) { 10558fa52d88SHong Zhang ierr = PetscTableFind(cmap_i,cstart+cworkA[k]+1,&tt);CHKERRQ(ierr); 105626fbe8dcSKarl Rupp if (tt) lens_i[j]++; 1057233c2ff6SSatish Balay } 1058233c2ff6SSatish Balay for (k=0; k<nzB; k++) { 10598fa52d88SHong Zhang ierr = PetscTableFind(cmap_i,bmap[cworkB[k]]+1,&tt);CHKERRQ(ierr); 106026fbe8dcSKarl Rupp if (tt) lens_i[j]++; 1061233c2ff6SSatish Balay } 1062307b7a18SHong Zhang 1063233c2ff6SSatish Balay #else 1064ca161407SBarry Smith for (k=0; k<nzA; k++) { 106526fbe8dcSKarl Rupp if (cmap_i[cstart + cworkA[k]]) lens_i[j]++; 1066ca161407SBarry Smith } 1067ca161407SBarry Smith for (k=0; k<nzB; k++) { 106826fbe8dcSKarl Rupp if (cmap_i[bmap[cworkB[k]]]) lens_i[j]++; 1069d5b485abSSatish Balay } 1070233c2ff6SSatish Balay #endif 1071307b7a18SHong Zhang } else { /* allcolumns */ 1072307b7a18SHong Zhang lens_i[j] = nzA + nzB; 1073307b7a18SHong Zhang } 1074d5b485abSSatish Balay } 1075a2feddc5SSatish Balay } 1076ca161407SBarry Smith } 1077233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 1078d5b485abSSatish Balay /* Create row map*/ 10798fa52d88SHong Zhang ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&rmap);CHKERRQ(ierr); 1080ff0794f7SSatish Balay for (i=0; i<ismax; i++) { 10818fa52d88SHong Zhang ierr = PetscTableCreate(nrow[i]+1,c->Mbs+1,&rmap[i]);CHKERRQ(ierr); 1082233c2ff6SSatish Balay } 1083233c2ff6SSatish Balay #else 1084233c2ff6SSatish Balay /* Create row map*/ 1085052f0c41SBarry Smith ierr = PetscMalloc((1+ismax)*sizeof(PetscInt*)+ ismax*Mbs*sizeof(PetscInt),&rmap);CHKERRQ(ierr); 1086b24ad042SBarry Smith rmap[0] = (PetscInt*)(rmap + ismax); 1087b24ad042SBarry Smith ierr = PetscMemzero(rmap[0],ismax*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 108826fbe8dcSKarl Rupp for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + Mbs; 1089233c2ff6SSatish Balay #endif 1090d5b485abSSatish Balay for (i=0; i<ismax; i++) { 1091d5b485abSSatish Balay irow_i = irow[i]; 1092d5b485abSSatish Balay jmax = nrow[i]; 1093233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 10948fa52d88SHong Zhang rmap_i = rmap[i]; 1095233c2ff6SSatish Balay for (j=0; j<jmax; j++) { 10964da72fa9SHong Zhang if (allrows[i]) { 10973861aac3SJed Brown ierr = PetscTableAdd(rmap_i,j+1,j+1,INSERT_VALUES);CHKERRQ(ierr); 10984da72fa9SHong Zhang } else { 10993861aac3SJed Brown ierr = PetscTableAdd(rmap_i,irow_i[j]+1,j+1,INSERT_VALUES);CHKERRQ(ierr); 1100233c2ff6SSatish Balay } 11014da72fa9SHong Zhang } 1102233c2ff6SSatish Balay #else 1103233c2ff6SSatish Balay rmap_i = rmap[i]; 1104d5b485abSSatish Balay for (j=0; j<jmax; j++) { 110526fbe8dcSKarl Rupp if (allrows[i]) rmap_i[j] = j; 110626fbe8dcSKarl Rupp else rmap_i[irow_i[j]] = j; 11074da72fa9SHong Zhang } 1108233c2ff6SSatish Balay #endif 1109d5b485abSSatish Balay } 1110d5b485abSSatish Balay 1111d5b485abSSatish Balay /* Update lens from offproc data */ 1112d5b485abSSatish Balay { 1113b24ad042SBarry Smith PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i; 1114b24ad042SBarry Smith PetscMPIInt ii; 1115d5b485abSSatish Balay 1116d5b485abSSatish Balay for (tmp2=0; tmp2<nrqs; tmp2++) { 1117b24ad042SBarry Smith ierr = MPI_Waitany(nrqs,r_waits3,&ii,r_status3+tmp2);CHKERRQ(ierr); 1118b24ad042SBarry Smith idex = pa[ii]; 1119999d9058SBarry Smith sbuf1_i = sbuf1[idex]; 1120d5b485abSSatish Balay jmax = sbuf1_i[0]; 1121d5b485abSSatish Balay ct1 = 2*jmax+1; 1122d5b485abSSatish Balay ct2 = 0; 1123b24ad042SBarry Smith rbuf2_i = rbuf2[ii]; 1124b24ad042SBarry Smith rbuf3_i = rbuf3[ii]; 1125d5b485abSSatish Balay for (j=1; j<=jmax; j++) { 1126d5b485abSSatish Balay is_no = sbuf1_i[2*j-1]; 1127d5b485abSSatish Balay max1 = sbuf1_i[2*j]; 1128d5b485abSSatish Balay lens_i = lens[is_no]; 11298fa52d88SHong Zhang if (!allcolumns[is_no]) cmap_i = cmap[is_no]; 1130d5b485abSSatish Balay rmap_i = rmap[is_no]; 1131d5b485abSSatish Balay for (k=0; k<max1; k++,ct1++) { 1132233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 11338fa52d88SHong Zhang ierr = PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);CHKERRQ(ierr); 1134233c2ff6SSatish Balay row--; 113526fbe8dcSKarl Rupp if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); 1136233c2ff6SSatish Balay #else 1137d5b485abSSatish Balay row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */ 1138233c2ff6SSatish Balay #endif 1139d5b485abSSatish Balay max2 = rbuf2_i[ct1]; 1140d5b485abSSatish Balay for (l=0; l<max2; l++,ct2++) { 1141307b7a18SHong Zhang if (!allcolumns[is_no]) { 1142233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 11438fa52d88SHong Zhang ierr = PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tt);CHKERRQ(ierr); 114426fbe8dcSKarl Rupp if (tt) lens_i[row]++; 1145233c2ff6SSatish Balay #else 114626fbe8dcSKarl Rupp if (cmap_i[rbuf3_i[ct2]]) lens_i[row]++; 1147233c2ff6SSatish Balay #endif 1148307b7a18SHong Zhang } else { /* allcolumns */ 1149307b7a18SHong Zhang lens_i[row]++; 1150307b7a18SHong Zhang } 1151d5b485abSSatish Balay } 1152d5b485abSSatish Balay } 1153d5b485abSSatish Balay } 1154d5b485abSSatish Balay } 1155d5b485abSSatish Balay } 1156606d414cSSatish Balay ierr = PetscFree(r_status3);CHKERRQ(ierr); 1157606d414cSSatish Balay ierr = PetscFree(r_waits3);CHKERRQ(ierr); 11580c468ba9SBarry Smith if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits3,s_status3);CHKERRQ(ierr);} 1159606d414cSSatish Balay ierr = PetscFree(s_status3);CHKERRQ(ierr); 1160606d414cSSatish Balay ierr = PetscFree(s_waits3);CHKERRQ(ierr); 1161d5b485abSSatish Balay 1162d5b485abSSatish Balay /* Create the submatrices */ 1163d5b485abSSatish Balay if (scall == MAT_REUSE_MATRIX) { 11647a868f3eSHong Zhang if (ijonly) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_REUSE_MATRIX and ijonly is not supported yet"); 1165d5b485abSSatish Balay /* 1166d5b485abSSatish Balay Assumes new rows are same length as the old rows, hence bug! 1167d5b485abSSatish Balay */ 1168d5b485abSSatish Balay for (i=0; i<ismax; i++) { 1169df36731bSBarry Smith mat = (Mat_SeqBAIJ*)(submats[i]->data); 1170e7e72b3dSBarry Smith if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i] || C->rmap->bs != bs)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); 1171b24ad042SBarry Smith ierr = PetscMemcmp(mat->ilen,lens[i],mat->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 1172e7e72b3dSBarry Smith if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Cannot reuse matrix. wrong no of nonzeros"); 1173d5b485abSSatish Balay /* Initial matrix as if empty */ 1174b24ad042SBarry Smith ierr = PetscMemzero(mat->ilen,mat->mbs*sizeof(PetscInt));CHKERRQ(ierr); 117526fbe8dcSKarl Rupp 1176d5f3da31SBarry Smith submats[i]->factortype = C->factortype; 1177d5b485abSSatish Balay } 1178ca161407SBarry Smith } else { 11797a868f3eSHong Zhang PetscInt bs_tmp; 118026fbe8dcSKarl Rupp if (ijonly) bs_tmp = 1; 118126fbe8dcSKarl Rupp else bs_tmp = bs; 1182d5b485abSSatish Balay for (i=0; i<ismax; i++) { 1183f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); 11847a868f3eSHong Zhang ierr = MatSetSizes(submats[i],nrow[i]*bs_tmp,ncol[i]*bs_tmp,nrow[i]*bs_tmp,ncol[i]*bs_tmp);CHKERRQ(ierr); 11857adad957SLisandro Dalcin ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); 11867a868f3eSHong Zhang ierr = MatSeqBAIJSetPreallocation(submats[i],bs_tmp,0,lens[i]);CHKERRQ(ierr); 11877a868f3eSHong Zhang ierr = MatSeqSBAIJSetPreallocation(submats[i],bs_tmp,0,lens[i]);CHKERRQ(ierr); /* this subroutine is used by SBAIJ routines */ 1188d5b485abSSatish Balay } 1189d5b485abSSatish Balay } 1190d5b485abSSatish Balay 1191d5b485abSSatish Balay /* Assemble the matrices */ 1192d5b485abSSatish Balay /* First assemble the local rows */ 1193d5b485abSSatish Balay { 1194b24ad042SBarry Smith PetscInt ilen_row,*imat_ilen,*imat_j,*imat_i; 11950298fd71SBarry Smith MatScalar *imat_a = NULL; 1196d5b485abSSatish Balay 1197d5b485abSSatish Balay for (i=0; i<ismax; i++) { 1198df36731bSBarry Smith mat = (Mat_SeqBAIJ*)submats[i]->data; 1199d5b485abSSatish Balay imat_ilen = mat->ilen; 1200d5b485abSSatish Balay imat_j = mat->j; 1201d5b485abSSatish Balay imat_i = mat->i; 12027a868f3eSHong Zhang if (!ijonly) imat_a = mat->a; 12038fa52d88SHong Zhang if (!allcolumns[i]) cmap_i = cmap[i]; 1204d5b485abSSatish Balay rmap_i = rmap[i]; 1205d5b485abSSatish Balay irow_i = irow[i]; 1206d5b485abSSatish Balay jmax = nrow[i]; 1207d5b485abSSatish Balay for (j=0; j<jmax; j++) { 120826fbe8dcSKarl Rupp if (allrows[i]) row = j; 120926fbe8dcSKarl Rupp else row = irow_i[j]; 121026fbe8dcSKarl Rupp 1211233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 1212899cda47SBarry Smith ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 1213233c2ff6SSatish Balay #else 1214d5b485abSSatish Balay proc = rtable[row]; 1215233c2ff6SSatish Balay #endif 1216d5b485abSSatish Balay if (proc == rank) { 121736f4e84dSSatish Balay row = row - rstart; 121836f4e84dSSatish Balay nzA = a_i[row+1] - a_i[row]; 121936f4e84dSSatish Balay nzB = b_i[row+1] - b_i[row]; 122036f4e84dSSatish Balay cworkA = a_j + a_i[row]; 122136f4e84dSSatish Balay cworkB = b_j + b_i[row]; 12227a868f3eSHong Zhang if (!ijonly) { 122336f4e84dSSatish Balay vworkA = a_a + a_i[row]*bs2; 122436f4e84dSSatish Balay vworkB = b_a + b_i[row]*bs2; 12257a868f3eSHong Zhang } 1226233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 12278fa52d88SHong Zhang ierr = PetscTableFind(rmap_i,row+rstart+1,&row);CHKERRQ(ierr); 1228233c2ff6SSatish Balay row--; 122926fbe8dcSKarl Rupp if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); 1230233c2ff6SSatish Balay #else 123136f4e84dSSatish Balay row = rmap_i[row + rstart]; 1232233c2ff6SSatish Balay #endif 1233df36731bSBarry Smith mat_i = imat_i[row]; 12347a868f3eSHong Zhang if (!ijonly) mat_a = imat_a + mat_i*bs2; 1235d5b485abSSatish Balay mat_j = imat_j + mat_i; 123636f4e84dSSatish Balay ilen_row = imat_ilen[row]; 123736f4e84dSSatish Balay 123836f4e84dSSatish Balay /* load the column indices for this row into cols*/ 1239307b7a18SHong Zhang if (!allcolumns[i]) { 124036f4e84dSSatish Balay for (l=0; l<nzB; l++) { 124136f4e84dSSatish Balay if ((ctmp = bmap[cworkB[l]]) < cstart) { 1242233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 12438fa52d88SHong Zhang ierr = PetscTableFind(cmap_i,ctmp+1,&tcol);CHKERRQ(ierr); 1244233c2ff6SSatish Balay if (tcol) { 1245233c2ff6SSatish Balay #else 124636f4e84dSSatish Balay if ((tcol = cmap_i[ctmp])) { 1247233c2ff6SSatish Balay #endif 1248df36731bSBarry Smith *mat_j++ = tcol - 1; 12493eda8832SBarry Smith ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1250549d3d68SSatish Balay mat_a += bs2; 1251d5b485abSSatish Balay ilen_row++; 1252d5b485abSSatish Balay } 1253ca161407SBarry Smith } else break; 125436f4e84dSSatish Balay } 125536f4e84dSSatish Balay imark = l; 125636f4e84dSSatish Balay for (l=0; l<nzA; l++) { 1257233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 12588fa52d88SHong Zhang ierr = PetscTableFind(cmap_i,cstart+cworkA[l]+1,&tcol);CHKERRQ(ierr); 1259233c2ff6SSatish Balay if (tcol) { 1260233c2ff6SSatish Balay #else 126136f4e84dSSatish Balay if ((tcol = cmap_i[cstart + cworkA[l]])) { 1262233c2ff6SSatish Balay #endif 126336f4e84dSSatish Balay *mat_j++ = tcol - 1; 12647a868f3eSHong Zhang if (!ijonly) { 12653eda8832SBarry Smith ierr = PetscMemcpy(mat_a,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1266549d3d68SSatish Balay mat_a += bs2; 12677a868f3eSHong Zhang } 126836f4e84dSSatish Balay ilen_row++; 126936f4e84dSSatish Balay } 127036f4e84dSSatish Balay } 127136f4e84dSSatish Balay for (l=imark; l<nzB; l++) { 1272233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 12738fa52d88SHong Zhang ierr = PetscTableFind(cmap_i,bmap[cworkB[l]]+1,&tcol);CHKERRQ(ierr); 1274233c2ff6SSatish Balay if (tcol) { 1275233c2ff6SSatish Balay #else 127636f4e84dSSatish Balay if ((tcol = cmap_i[bmap[cworkB[l]]])) { 1277233c2ff6SSatish Balay #endif 127836f4e84dSSatish Balay *mat_j++ = tcol - 1; 12797a868f3eSHong Zhang if (!ijonly) { 12803eda8832SBarry Smith ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1281549d3d68SSatish Balay mat_a += bs2; 12827a868f3eSHong Zhang } 128336f4e84dSSatish Balay ilen_row++; 128436f4e84dSSatish Balay } 128536f4e84dSSatish Balay } 1286307b7a18SHong Zhang } else { /* allcolumns */ 1287307b7a18SHong Zhang for (l=0; l<nzB; l++) { 1288307b7a18SHong Zhang if ((ctmp = bmap[cworkB[l]]) < cstart) { 1289307b7a18SHong Zhang *mat_j++ = ctmp; 1290307b7a18SHong Zhang ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1291307b7a18SHong Zhang mat_a += bs2; 1292307b7a18SHong Zhang ilen_row++; 1293307b7a18SHong Zhang } else break; 1294307b7a18SHong Zhang } 1295307b7a18SHong Zhang imark = l; 1296307b7a18SHong Zhang for (l=0; l<nzA; l++) { 1297307b7a18SHong Zhang *mat_j++ = cstart+cworkA[l]; 1298307b7a18SHong Zhang if (!ijonly) { 1299307b7a18SHong Zhang ierr = PetscMemcpy(mat_a,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1300307b7a18SHong Zhang mat_a += bs2; 1301307b7a18SHong Zhang } 1302307b7a18SHong Zhang ilen_row++; 1303307b7a18SHong Zhang } 1304307b7a18SHong Zhang for (l=imark; l<nzB; l++) { 1305307b7a18SHong Zhang *mat_j++ = bmap[cworkB[l]]; 1306307b7a18SHong Zhang if (!ijonly) { 1307307b7a18SHong Zhang ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1308307b7a18SHong Zhang mat_a += bs2; 1309307b7a18SHong Zhang } 1310307b7a18SHong Zhang ilen_row++; 1311307b7a18SHong Zhang } 1312307b7a18SHong Zhang } 1313d5b485abSSatish Balay imat_ilen[row] = ilen_row; 1314d5b485abSSatish Balay } 1315d5b485abSSatish Balay } 1316d5b485abSSatish Balay } 1317d5b485abSSatish Balay } 1318d5b485abSSatish Balay 1319d5b485abSSatish Balay /* Now assemble the off proc rows*/ 1320d5b485abSSatish Balay { 1321b24ad042SBarry Smith PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen; 1322b24ad042SBarry Smith PetscInt *imat_j,*imat_i; 13230298fd71SBarry Smith MatScalar *imat_a = NULL,*rbuf4_i = NULL; 1324b24ad042SBarry Smith PetscMPIInt ii; 1325d5b485abSSatish Balay 1326d5b485abSSatish Balay for (tmp2=0; tmp2<nrqs; tmp2++) { 132726fbe8dcSKarl Rupp if (ijonly) ii = tmp2; 132826fbe8dcSKarl Rupp else { 1329b24ad042SBarry Smith ierr = MPI_Waitany(nrqs,r_waits4,&ii,r_status4+tmp2);CHKERRQ(ierr); 13307a868f3eSHong Zhang } 1331b24ad042SBarry Smith idex = pa[ii]; 1332999d9058SBarry Smith sbuf1_i = sbuf1[idex]; 1333d5b485abSSatish Balay jmax = sbuf1_i[0]; 1334d5b485abSSatish Balay ct1 = 2*jmax + 1; 1335d5b485abSSatish Balay ct2 = 0; 1336b24ad042SBarry Smith rbuf2_i = rbuf2[ii]; 1337b24ad042SBarry Smith rbuf3_i = rbuf3[ii]; 13387a868f3eSHong Zhang if (!ijonly) rbuf4_i = rbuf4[ii]; 1339d5b485abSSatish Balay for (j=1; j<=jmax; j++) { 1340d5b485abSSatish Balay is_no = sbuf1_i[2*j-1]; 13418fa52d88SHong Zhang if (!allcolumns[is_no]) cmap_i = cmap[is_no]; 1342d5b485abSSatish Balay rmap_i = rmap[is_no]; 1343df36731bSBarry Smith mat = (Mat_SeqBAIJ*)submats[is_no]->data; 1344d5b485abSSatish Balay imat_ilen = mat->ilen; 1345d5b485abSSatish Balay imat_j = mat->j; 1346d5b485abSSatish Balay imat_i = mat->i; 13477a868f3eSHong Zhang if (!ijonly) imat_a = mat->a; 1348d5b485abSSatish Balay max1 = sbuf1_i[2*j]; 1349d5b485abSSatish Balay for (k=0; k<max1; k++,ct1++) { 1350d5b485abSSatish Balay row = sbuf1_i[ct1]; 1351233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 13528fa52d88SHong Zhang ierr = PetscTableFind(rmap_i,row+1,&row);CHKERRQ(ierr); 1353233c2ff6SSatish Balay row--; 135426fbe8dcSKarl Rupp if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); 1355233c2ff6SSatish Balay #else 1356d5b485abSSatish Balay row = rmap_i[row]; 1357233c2ff6SSatish Balay #endif 1358d5b485abSSatish Balay ilen = imat_ilen[row]; 1359df36731bSBarry Smith mat_i = imat_i[row]; 13607a868f3eSHong Zhang if (!ijonly) mat_a = imat_a + mat_i*bs2; 1361d5b485abSSatish Balay mat_j = imat_j + mat_i; 1362d5b485abSSatish Balay max2 = rbuf2_i[ct1]; 1363307b7a18SHong Zhang 1364307b7a18SHong Zhang if (!allcolumns[is_no]) { 1365d5b485abSSatish Balay for (l=0; l<max2; l++,ct2++) { 1366233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 13678fa52d88SHong Zhang ierr = PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);CHKERRQ(ierr); 1368233c2ff6SSatish Balay if (tcol) { 1369233c2ff6SSatish Balay #else 1370d5b485abSSatish Balay if ((tcol = cmap_i[rbuf3_i[ct2]])) { 1371233c2ff6SSatish Balay #endif 1372df36731bSBarry Smith *mat_j++ = tcol - 1; 13737a868f3eSHong Zhang if (!ijonly) { 13743eda8832SBarry Smith ierr = PetscMemcpy(mat_a,rbuf4_i+ct2*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1375549d3d68SSatish Balay mat_a += bs2; 13767a868f3eSHong Zhang } 1377d5b485abSSatish Balay ilen++; 1378d5b485abSSatish Balay } 1379d5b485abSSatish Balay } 1380307b7a18SHong Zhang } else { /* allcolumns */ 1381307b7a18SHong Zhang for (l=0; l<max2; l++,ct2++) { 1382307b7a18SHong Zhang *mat_j++ = rbuf3_i[ct2]; 1383307b7a18SHong Zhang if (!ijonly) { 1384307b7a18SHong Zhang ierr = PetscMemcpy(mat_a,rbuf4_i+ct2*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1385307b7a18SHong Zhang mat_a += bs2; 1386307b7a18SHong Zhang } 1387307b7a18SHong Zhang ilen++; 1388307b7a18SHong Zhang } 1389307b7a18SHong Zhang } 1390d5b485abSSatish Balay imat_ilen[row] = ilen; 1391d5b485abSSatish Balay } 1392d5b485abSSatish Balay } 1393d5b485abSSatish Balay } 1394d5b485abSSatish Balay } 13957a868f3eSHong Zhang if (!ijonly) { 1396606d414cSSatish Balay ierr = PetscFree(r_status4);CHKERRQ(ierr); 1397606d414cSSatish Balay ierr = PetscFree(r_waits4);CHKERRQ(ierr); 13980c468ba9SBarry Smith if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits4,s_status4);CHKERRQ(ierr);} 1399606d414cSSatish Balay ierr = PetscFree(s_waits4);CHKERRQ(ierr); 1400606d414cSSatish Balay ierr = PetscFree(s_status4);CHKERRQ(ierr); 14017a868f3eSHong Zhang } 1402d5b485abSSatish Balay 1403d5b485abSSatish Balay /* Restore the indices */ 1404d5b485abSSatish Balay for (i=0; i<ismax; i++) { 14054da72fa9SHong Zhang if (!allrows[i]) { 1406d5b485abSSatish Balay ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); 14074da72fa9SHong Zhang } 1408307b7a18SHong Zhang if (!allcolumns[i]) { 1409d5b485abSSatish Balay ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); 1410d5b485abSSatish Balay } 1411307b7a18SHong Zhang } 1412d5b485abSSatish Balay 1413d5b485abSSatish Balay /* Destroy allocated memory */ 141423bdbc58SBarry Smith #if defined(PETSC_USE_CTABLE) 141523bdbc58SBarry Smith ierr = PetscFree4(irow,icol,nrow,ncol);CHKERRQ(ierr); 141623bdbc58SBarry Smith #else 141723bdbc58SBarry Smith ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr); 141823bdbc58SBarry Smith #endif 141923bdbc58SBarry Smith ierr = PetscFree4(w1,w2,w3,w4);CHKERRQ(ierr); 1420606d414cSSatish Balay ierr = PetscFree(pa);CHKERRQ(ierr); 1421d5b485abSSatish Balay 142223bdbc58SBarry Smith ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr); 1423606d414cSSatish Balay ierr = PetscFree(sbuf1);CHKERRQ(ierr); 1424606d414cSSatish Balay ierr = PetscFree(rbuf2);CHKERRQ(ierr); 1425d5b485abSSatish Balay for (i=0; i<nrqr; ++i) { 1426606d414cSSatish Balay ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); 1427d5b485abSSatish Balay } 1428d5b485abSSatish Balay for (i=0; i<nrqs; ++i) { 1429606d414cSSatish Balay ierr = PetscFree(rbuf3[i]);CHKERRQ(ierr); 1430d5b485abSSatish Balay } 143123bdbc58SBarry Smith ierr = PetscFree3(sbuf2,req_size,req_source);CHKERRQ(ierr); 1432606d414cSSatish Balay ierr = PetscFree(rbuf3);CHKERRQ(ierr); 1433606d414cSSatish Balay ierr = PetscFree(sbuf_aj[0]);CHKERRQ(ierr); 1434606d414cSSatish Balay ierr = PetscFree(sbuf_aj);CHKERRQ(ierr); 14357a868f3eSHong Zhang if (!ijonly) { 14367a868f3eSHong Zhang for (i=0; i<nrqs; ++i) {ierr = PetscFree(rbuf4[i]);CHKERRQ(ierr);} 14377a868f3eSHong Zhang ierr = PetscFree(rbuf4);CHKERRQ(ierr); 1438606d414cSSatish Balay ierr = PetscFree(sbuf_aa[0]);CHKERRQ(ierr); 1439606d414cSSatish Balay ierr = PetscFree(sbuf_aa);CHKERRQ(ierr); 14407a868f3eSHong Zhang } 1441d5b485abSSatish Balay 1442233c2ff6SSatish Balay #if defined(PETSC_USE_CTABLE) 1443ff0794f7SSatish Balay for (i=0; i<ismax; i++) { 14448fa52d88SHong Zhang ierr = PetscTableDestroy((PetscTable*)&rmap[i]);CHKERRQ(ierr); 1445233c2ff6SSatish Balay } 1446233c2ff6SSatish Balay #endif 14478fa52d88SHong Zhang ierr = PetscFree(rmap);CHKERRQ(ierr); 14488fa52d88SHong Zhang 14498fa52d88SHong Zhang for (i=0; i<ismax; i++) { 14508fa52d88SHong Zhang if (!allcolumns[i]) { 14518fa52d88SHong Zhang #if defined(PETSC_USE_CTABLE) 14528fa52d88SHong Zhang ierr = PetscTableDestroy((PetscTable*)&cmap[i]);CHKERRQ(ierr); 14538fa52d88SHong Zhang #else 14548fa52d88SHong Zhang ierr = PetscFree(cmap[i]);CHKERRQ(ierr); 14558fa52d88SHong Zhang #endif 14568fa52d88SHong Zhang } 14578fa52d88SHong Zhang } 14588fa52d88SHong Zhang ierr = PetscFree(cmap);CHKERRQ(ierr); 1459606d414cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1460d5b485abSSatish Balay 1461d5b485abSSatish Balay for (i=0; i<ismax; i++) { 146236f4e84dSSatish Balay ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 146336f4e84dSSatish Balay ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1464d5b485abSSatish Balay } 14657a868f3eSHong Zhang 14667a868f3eSHong Zhang c->ijonly = PETSC_FALSE; /* set back to the default */ 14673a40ed3dSBarry Smith PetscFunctionReturn(0); 1468d5b485abSSatish Balay } 1469ca161407SBarry Smith 1470