173f4d377SMatthew Knepley /*$Id: mmaij.c,v 1.59 2001/08/07 03:02:49 balay Exp $*/ 28c79f6d3SBarry Smith 38c79f6d3SBarry Smith /* 48c79f6d3SBarry Smith Support for the parallel AIJ matrix vector multiply 58c79f6d3SBarry Smith */ 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 88c79f6d3SBarry Smith 94a2ae208SSatish Balay #undef __FUNCT__ 104a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 1144a69424SLois Curfman McInnes int MatSetUpMultiply_MPIAIJ(Mat mat) 128c79f6d3SBarry Smith { 1344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 14ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 15273d9f13SBarry Smith int N = mat->N,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 16416022c9SBarry Smith int shift = B->indexshift; 171eb62cbbSBarry Smith IS from,to; 181eb62cbbSBarry Smith Vec gvec; 19aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 200f5bd95cSBarry Smith PetscTable gid1_lid1; 210f5bd95cSBarry Smith PetscTablePosition tpos; 222066d6f7SSatish Balay int gid,lid; 232066d6f7SSatish Balay #endif 242066d6f7SSatish Balay 253a40ed3dSBarry Smith PetscFunctionBegin; 262066d6f7SSatish Balay 27aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 282066d6f7SSatish Balay /* use a table - Mark Adams (this has not been tested with "shift") */ 29273d9f13SBarry Smith ierr = PetscTableCreate(aij->B->m,&gid1_lid1);CHKERRQ(ierr); 30273d9f13SBarry Smith for (i=0; i<aij->B->m; i++) { 312066d6f7SSatish Balay for (j=0; j<B->ilen[i]; j++) { 3208c73f0fSSatish Balay int data,gid1 = aj[B->i[i] + shift + j] + 1 + shift; 330f5bd95cSBarry Smith ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34fa46199cSSatish Balay if (!data) { 352066d6f7SSatish Balay /* one based table */ 360f5bd95cSBarry Smith ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 372066d6f7SSatish Balay } 382066d6f7SSatish Balay } 392066d6f7SSatish Balay } 402066d6f7SSatish Balay /* form array of columns we need */ 41b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 420f5bd95cSBarry Smith ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 432066d6f7SSatish Balay while (tpos) { 440f5bd95cSBarry Smith ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45b0a32e0cSBarry Smith gid--; 46b0a32e0cSBarry Smith lid--; 472066d6f7SSatish Balay garray[lid] = gid; 482066d6f7SSatish Balay } 490064e2bbSSatish Balay ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 500f5bd95cSBarry Smith ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 512066d6f7SSatish Balay for (i=0; i<ec; i++) { 520f5bd95cSBarry Smith ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 532066d6f7SSatish Balay } 542066d6f7SSatish Balay /* compact out the extra columns in B */ 55273d9f13SBarry Smith for (i=0; i<aij->B->m; i++) { 562066d6f7SSatish Balay for (j=0; j<B->ilen[i]; j++) { 5708c73f0fSSatish Balay int gid1 = aj[B->i[i] + shift + j] + 1 + shift; 580f5bd95cSBarry Smith ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59fa46199cSSatish Balay lid --; 6008c73f0fSSatish Balay aj[B->i[i] + shift + j] = lid - shift; 612066d6f7SSatish Balay } 622066d6f7SSatish Balay } 63273d9f13SBarry Smith aij->B->n = aij->B->N = ec; 640f5bd95cSBarry Smith ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 652066d6f7SSatish Balay /* Mark Adams */ 662066d6f7SSatish Balay #else 678c79f6d3SBarry Smith /* For the first stab we make an array as long as the number of columns */ 681eb62cbbSBarry Smith /* mark those columns that are in aij->B */ 69b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&indices);CHKERRQ(ierr); 70549d3d68SSatish Balay ierr = PetscMemzero(indices,N*sizeof(int));CHKERRQ(ierr); 71273d9f13SBarry Smith for (i=0; i<aij->B->m; i++) { 72d6dfbf8fSBarry Smith for (j=0; j<B->ilen[i]; j++) { 73dbb450caSBarry Smith if (!indices[aj[B->i[i] +shift + j] + shift]) ec++; 74416022c9SBarry Smith indices[aj[B->i[i] + shift + j] + shift] = 1; 75416022c9SBarry Smith } 761eb62cbbSBarry Smith } 778c79f6d3SBarry Smith 781eb62cbbSBarry Smith /* form array of columns we need */ 79b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 801eb62cbbSBarry Smith ec = 0; 811eb62cbbSBarry Smith for (i=0; i<N; i++) { 821eb62cbbSBarry Smith if (indices[i]) garray[ec++] = i; 831eb62cbbSBarry Smith } 841eb62cbbSBarry Smith 851eb62cbbSBarry Smith /* make indices now point into garray */ 861eb62cbbSBarry Smith for (i=0; i<ec; i++) { 87dbb450caSBarry Smith indices[garray[i]] = i-shift; 881eb62cbbSBarry Smith } 891eb62cbbSBarry Smith 901eb62cbbSBarry Smith /* compact out the extra columns in B */ 91273d9f13SBarry Smith for (i=0; i<aij->B->m; i++) { 92d6dfbf8fSBarry Smith for (j=0; j<B->ilen[i]; j++) { 93dbb450caSBarry Smith aj[B->i[i] + shift + j] = indices[aj[B->i[i] + shift + j]+shift]; 941eb62cbbSBarry Smith } 95d6dfbf8fSBarry Smith } 96273d9f13SBarry Smith aij->B->n = aij->B->N = ec; 97606d414cSSatish Balay ierr = PetscFree(indices);CHKERRQ(ierr); 982066d6f7SSatish Balay #endif 991eb62cbbSBarry Smith /* create local vector that is used to scatter into */ 100029af93fSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 1011eb62cbbSBarry Smith 102d6dfbf8fSBarry Smith /* create two temporary Index sets for build scatter gather */ 103b9b97703SBarry Smith ierr = ISCreateGeneral(mat->comm,ec,garray,&from);CHKERRQ(ierr); 104029af93fSBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 1051eb62cbbSBarry Smith 1061eb62cbbSBarry Smith /* create temporary global vector to generate scatter context */ 1071eb62cbbSBarry Smith /* this is inefficient, but otherwise we must do either 1081eb62cbbSBarry Smith 1) save garray until the first actual scatter when the vector is known or 1091eb62cbbSBarry Smith 2) have another way of generating a scatter context without a vector.*/ 110273d9f13SBarry Smith ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 1111eb62cbbSBarry Smith 1122d336d48SLois Curfman McInnes /* generate the scatter context */ 11308480c60SBarry Smith ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 114b0a32e0cSBarry Smith PetscLogObjectParent(mat,aij->Mvctx); 115b0a32e0cSBarry Smith PetscLogObjectParent(mat,aij->lvec); 116b0a32e0cSBarry Smith PetscLogObjectParent(mat,from); 117b0a32e0cSBarry Smith PetscLogObjectParent(mat,to); 1189e25ed09SBarry Smith aij->garray = garray; 119b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 12078b31e54SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 12178b31e54SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 122888f2ed8SSatish Balay ierr = VecDestroy(gvec);CHKERRQ(ierr); 1233a40ed3dSBarry Smith PetscFunctionReturn(0); 1248c79f6d3SBarry Smith } 1259e25ed09SBarry Smith 1269e25ed09SBarry Smith 1274a2ae208SSatish Balay #undef __FUNCT__ 1284a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPIAIJ" 1292493cbb0SBarry Smith /* 1302493cbb0SBarry Smith Takes the local part of an already assembled MPIAIJ matrix 1312493cbb0SBarry Smith and disassembles it. This is to allow new nonzeros into the matrix 1322493cbb0SBarry Smith that require more communication in the matrix vector multiply. 1332493cbb0SBarry Smith Thus certain data-structures must be rebuilt. 1342493cbb0SBarry Smith 1352493cbb0SBarry Smith Kind of slow! But that's what application programmers get when 1362493cbb0SBarry Smith they are sloppy. 1372493cbb0SBarry Smith */ 1382493cbb0SBarry Smith int DisAssemble_MPIAIJ(Mat A) 1392493cbb0SBarry Smith { 1402493cbb0SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 1412493cbb0SBarry Smith Mat B = aij->B,Bnew; 142ec8511deSBarry Smith Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 143273d9f13SBarry Smith int ierr,i,j,m = B->m,n = A->N,col,ct = 0,*garray = aij->garray; 144416022c9SBarry Smith int *nz,ec,shift = Baij->indexshift; 14587828ca2SBarry Smith PetscScalar v; 1462493cbb0SBarry Smith 1473a40ed3dSBarry Smith PetscFunctionBegin; 1482493cbb0SBarry Smith /* free stuff related to matrix-vec multiply */ 149b0a32e0cSBarry Smith ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 1502493cbb0SBarry Smith ierr = VecDestroy(aij->lvec);CHKERRQ(ierr); aij->lvec = 0; 15108480c60SBarry Smith ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr); aij->Mvctx = 0; 152464493b3SBarry Smith if (aij->colmap) { 153aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 1540f5bd95cSBarry Smith ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr); 1550f5bd95cSBarry Smith aij->colmap = 0; 1562066d6f7SSatish Balay #else 157606d414cSSatish Balay ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 158606d414cSSatish Balay aij->colmap = 0; 159b0a32e0cSBarry Smith PetscLogObjectMemory(A,-aij->B->n*sizeof(int)); 1602066d6f7SSatish Balay #endif 161464493b3SBarry Smith } 1622493cbb0SBarry Smith 1632493cbb0SBarry Smith /* make sure that B is assembled so we can access its values */ 1646d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165fe2f2677SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1662493cbb0SBarry Smith 1672493cbb0SBarry Smith /* invent new B and copy stuff over */ 168b0a32e0cSBarry Smith ierr = PetscMalloc((m+1)*sizeof(int),&nz);CHKERRQ(ierr); 16948b35521SBarry Smith for (i=0; i<m; i++) { 17048b35521SBarry Smith nz[i] = Baij->i[i+1] - Baij->i[i]; 17148b35521SBarry Smith } 172029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,0,nz,&Bnew);CHKERRQ(ierr); 173606d414cSSatish Balay ierr = PetscFree(nz);CHKERRQ(ierr); 1742493cbb0SBarry Smith for (i=0; i<m; i++) { 175dbb450caSBarry Smith for (j=Baij->i[i]+shift; j<Baij->i[i+1]+shift; j++) { 176dbb450caSBarry Smith col = garray[Baij->j[ct]+shift]; 1772493cbb0SBarry Smith v = Baij->a[ct++]; 17883271157SBarry Smith ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 1792493cbb0SBarry Smith } 1802493cbb0SBarry Smith } 181606d414cSSatish Balay ierr = PetscFree(aij->garray);CHKERRQ(ierr); 182606d414cSSatish Balay aij->garray = 0; 183b0a32e0cSBarry Smith PetscLogObjectMemory(A,-ec*sizeof(int)); 1842493cbb0SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 185b0a32e0cSBarry Smith PetscLogObjectParent(A,Bnew); 1862493cbb0SBarry Smith aij->B = Bnew; 187227d817aSBarry Smith A->was_assembled = PETSC_FALSE; 1883a40ed3dSBarry Smith PetscFunctionReturn(0); 1892493cbb0SBarry Smith } 1902493cbb0SBarry Smith 191*2cd6534aSBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 192*2cd6534aSBarry Smith 193*2cd6534aSBarry Smith static int *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 194*2cd6534aSBarry Smith parts of the local matrix */ 195*2cd6534aSBarry Smith static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 196*2cd6534aSBarry Smith 197*2cd6534aSBarry Smith 198*2cd6534aSBarry Smith #undef __FUNCT__ 199*2cd6534aSBarry Smith #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 200*2cd6534aSBarry Smith int MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 201*2cd6534aSBarry Smith { 202*2cd6534aSBarry Smith Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 203*2cd6534aSBarry Smith Mat_SeqAIJ *A = (Mat_SeqAIJ*)ina->A->data; 204*2cd6534aSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*)ina->B->data; 205*2cd6534aSBarry Smith int ierr,i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 206*2cd6534aSBarry Smith int *r_rmapd,*r_rmapo; 207*2cd6534aSBarry Smith 208*2cd6534aSBarry Smith PetscFunctionBegin; 209*2cd6534aSBarry Smith ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 210*2cd6534aSBarry Smith ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 211*2cd6534aSBarry Smith ierr = PetscMalloc((inA->mapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr); 212*2cd6534aSBarry Smith ierr = PetscMemzero(r_rmapd,inA->mapping->n*sizeof(int));CHKERRQ(ierr); 213*2cd6534aSBarry Smith nt = 0; 214*2cd6534aSBarry Smith for (i=0; i<inA->mapping->n; i++) { 215*2cd6534aSBarry Smith if (inA->mapping->indices[i] >= cstart && inA->mapping->indices[i] < cend) { 216*2cd6534aSBarry Smith nt++; 217*2cd6534aSBarry Smith r_rmapd[i] = inA->mapping->indices[i] + 1; 218*2cd6534aSBarry Smith } 219*2cd6534aSBarry Smith } 220*2cd6534aSBarry Smith if (nt != n) SETERRQ2(1,"Hmm nt %d n %d",nt,n); 221*2cd6534aSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&auglyrmapd);CHKERRQ(ierr); 222*2cd6534aSBarry Smith for (i=0; i<inA->mapping->n; i++) { 223*2cd6534aSBarry Smith if (r_rmapd[i]){ 224*2cd6534aSBarry Smith auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 225*2cd6534aSBarry Smith } 226*2cd6534aSBarry Smith } 227*2cd6534aSBarry Smith ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 228*2cd6534aSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 229*2cd6534aSBarry Smith 230*2cd6534aSBarry Smith ierr = PetscMalloc((inA->N+1)*sizeof(int),&lindices);CHKERRQ(ierr); 231*2cd6534aSBarry Smith ierr = PetscMemzero(lindices,inA->N*sizeof(int));CHKERRQ(ierr); 232*2cd6534aSBarry Smith for (i=0; i<ina->B->n; i++) { 233*2cd6534aSBarry Smith lindices[garray[i]] = i+1; 234*2cd6534aSBarry Smith } 235*2cd6534aSBarry Smith no = inA->mapping->n - nt; 236*2cd6534aSBarry Smith ierr = PetscMalloc((inA->mapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr); 237*2cd6534aSBarry Smith ierr = PetscMemzero(r_rmapo,inA->mapping->n*sizeof(int));CHKERRQ(ierr); 238*2cd6534aSBarry Smith nt = 0; 239*2cd6534aSBarry Smith for (i=0; i<inA->mapping->n; i++) { 240*2cd6534aSBarry Smith if (lindices[inA->mapping->indices[i]]) { 241*2cd6534aSBarry Smith nt++; 242*2cd6534aSBarry Smith r_rmapo[i] = lindices[inA->mapping->indices[i]]; 243*2cd6534aSBarry Smith } 244*2cd6534aSBarry Smith } 245*2cd6534aSBarry Smith if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n); 246*2cd6534aSBarry Smith ierr = PetscFree(lindices);CHKERRQ(ierr); 247*2cd6534aSBarry Smith ierr = PetscMalloc((nt+1)*sizeof(int),&auglyrmapo);CHKERRQ(ierr); 248*2cd6534aSBarry Smith for (i=0; i<inA->mapping->n; i++) { 249*2cd6534aSBarry Smith if (r_rmapo[i]){ 250*2cd6534aSBarry Smith auglyrmapo[(r_rmapo[i]-1)] = i; 251*2cd6534aSBarry Smith } 252*2cd6534aSBarry Smith } 253*2cd6534aSBarry Smith ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 254*2cd6534aSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 255*2cd6534aSBarry Smith 256*2cd6534aSBarry Smith PetscFunctionReturn(0); 257*2cd6534aSBarry Smith } 258*2cd6534aSBarry Smith 259*2cd6534aSBarry Smith #undef __FUNCT__ 260*2cd6534aSBarry Smith #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 261*2cd6534aSBarry Smith int MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 262*2cd6534aSBarry Smith { 263*2cd6534aSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 264*2cd6534aSBarry Smith int ierr,n,i; 265*2cd6534aSBarry Smith PetscScalar *d,*o,*s; 266*2cd6534aSBarry Smith 267*2cd6534aSBarry Smith PetscFunctionBegin; 268*2cd6534aSBarry Smith if (!auglyrmapd) { 269*2cd6534aSBarry Smith ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 270*2cd6534aSBarry Smith } 271*2cd6534aSBarry Smith 272*2cd6534aSBarry Smith ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 273*2cd6534aSBarry Smith 274*2cd6534aSBarry Smith ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 275*2cd6534aSBarry Smith ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 276*2cd6534aSBarry Smith for (i=0; i<n; i++) { 277*2cd6534aSBarry Smith d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 278*2cd6534aSBarry Smith } 279*2cd6534aSBarry Smith ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 280*2cd6534aSBarry Smith /* column scale "diagonal" portion of local matrix */ 281*2cd6534aSBarry Smith ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 282*2cd6534aSBarry Smith 283*2cd6534aSBarry Smith ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 284*2cd6534aSBarry Smith ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 285*2cd6534aSBarry Smith for (i=0; i<n; i++) { 286*2cd6534aSBarry Smith o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 287*2cd6534aSBarry Smith } 288*2cd6534aSBarry Smith ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 289*2cd6534aSBarry Smith ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 290*2cd6534aSBarry Smith /* column scale "off-diagonal" portion of local matrix */ 291*2cd6534aSBarry Smith ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 292*2cd6534aSBarry Smith 293*2cd6534aSBarry Smith PetscFunctionReturn(0); 294*2cd6534aSBarry Smith } 295*2cd6534aSBarry Smith 296*2cd6534aSBarry Smith 297*2cd6534aSBarry Smith 29848b35521SBarry Smith 299