173f4d377SMatthew Knepley /*$Id: mmaij.c,v 1.59 2001/08/07 03:02:49 balay Exp $*/ 28c79f6d3SBarry Smith 38c79f6d3SBarry Smith /* 48c79f6d3SBarry Smith Support for the parallel AIJ matrix vector multiply 58c79f6d3SBarry Smith */ 670f55243SBarry Smith #include "src/mat/impls/aij/mpi/mpiaij.h" 7f5eb4b81SSatish Balay #include "src/vec/vecimpl.h" 88c79f6d3SBarry Smith 94a2ae208SSatish Balay #undef __FUNCT__ 104a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 1144a69424SLois Curfman McInnes int MatSetUpMultiply_MPIAIJ(Mat mat) 128c79f6d3SBarry Smith { 1344a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 14ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 15273d9f13SBarry Smith int N = mat->N,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 161eb62cbbSBarry Smith IS from,to; 171eb62cbbSBarry Smith Vec gvec; 18aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 190f5bd95cSBarry Smith PetscTable gid1_lid1; 200f5bd95cSBarry Smith PetscTablePosition tpos; 212066d6f7SSatish Balay int gid,lid; 222066d6f7SSatish Balay #endif 232066d6f7SSatish Balay 243a40ed3dSBarry Smith PetscFunctionBegin; 252066d6f7SSatish Balay 26aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 272066d6f7SSatish Balay /* use a table - Mark Adams (this has not been tested with "shift") */ 28273d9f13SBarry Smith ierr = PetscTableCreate(aij->B->m,&gid1_lid1);CHKERRQ(ierr); 29273d9f13SBarry Smith for (i=0; i<aij->B->m; i++) { 302066d6f7SSatish Balay for (j=0; j<B->ilen[i]; j++) { 3108c73f0fSSatish Balay int data,gid1 = aj[B->i[i] + shift + j] + 1 + shift; 320f5bd95cSBarry Smith ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33fa46199cSSatish Balay if (!data) { 342066d6f7SSatish Balay /* one based table */ 350f5bd95cSBarry Smith ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 362066d6f7SSatish Balay } 372066d6f7SSatish Balay } 382066d6f7SSatish Balay } 392066d6f7SSatish Balay /* form array of columns we need */ 40b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 410f5bd95cSBarry Smith ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 422066d6f7SSatish Balay while (tpos) { 430f5bd95cSBarry Smith ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44b0a32e0cSBarry Smith gid--; 45b0a32e0cSBarry Smith lid--; 462066d6f7SSatish Balay garray[lid] = gid; 472066d6f7SSatish Balay } 480064e2bbSSatish Balay ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 490f5bd95cSBarry Smith ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 502066d6f7SSatish Balay for (i=0; i<ec; i++) { 510f5bd95cSBarry Smith ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 522066d6f7SSatish Balay } 532066d6f7SSatish Balay /* compact out the extra columns in B */ 54273d9f13SBarry Smith for (i=0; i<aij->B->m; i++) { 552066d6f7SSatish Balay for (j=0; j<B->ilen[i]; j++) { 5608c73f0fSSatish Balay int gid1 = aj[B->i[i] + shift + j] + 1 + shift; 570f5bd95cSBarry Smith ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 58fa46199cSSatish Balay lid --; 5908c73f0fSSatish Balay aj[B->i[i] + shift + j] = lid - shift; 602066d6f7SSatish Balay } 612066d6f7SSatish Balay } 62273d9f13SBarry Smith aij->B->n = aij->B->N = ec; 630f5bd95cSBarry Smith ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 642066d6f7SSatish Balay /* Mark Adams */ 652066d6f7SSatish Balay #else 668c79f6d3SBarry Smith /* For the first stab we make an array as long as the number of columns */ 671eb62cbbSBarry Smith /* mark those columns that are in aij->B */ 68b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&indices);CHKERRQ(ierr); 69549d3d68SSatish Balay ierr = PetscMemzero(indices,N*sizeof(int));CHKERRQ(ierr); 70273d9f13SBarry Smith for (i=0; i<aij->B->m; i++) { 71d6dfbf8fSBarry Smith for (j=0; j<B->ilen[i]; j++) { 72*bfec09a0SHong Zhang if (!indices[aj[B->i[i] + j] ]) ec++; 73*bfec09a0SHong Zhang indices[aj[B->i[i] + j] ] = 1; 74416022c9SBarry Smith } 751eb62cbbSBarry Smith } 768c79f6d3SBarry Smith 771eb62cbbSBarry Smith /* form array of columns we need */ 78b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 791eb62cbbSBarry Smith ec = 0; 801eb62cbbSBarry Smith for (i=0; i<N; i++) { 811eb62cbbSBarry Smith if (indices[i]) garray[ec++] = i; 821eb62cbbSBarry Smith } 831eb62cbbSBarry Smith 841eb62cbbSBarry Smith /* make indices now point into garray */ 851eb62cbbSBarry Smith for (i=0; i<ec; i++) { 86*bfec09a0SHong Zhang indices[garray[i]] = i; 871eb62cbbSBarry Smith } 881eb62cbbSBarry Smith 891eb62cbbSBarry Smith /* compact out the extra columns in B */ 90273d9f13SBarry Smith for (i=0; i<aij->B->m; i++) { 91d6dfbf8fSBarry Smith for (j=0; j<B->ilen[i]; j++) { 92*bfec09a0SHong Zhang aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 931eb62cbbSBarry Smith } 94d6dfbf8fSBarry Smith } 95273d9f13SBarry Smith aij->B->n = aij->B->N = ec; 96606d414cSSatish Balay ierr = PetscFree(indices);CHKERRQ(ierr); 972066d6f7SSatish Balay #endif 981eb62cbbSBarry Smith /* create local vector that is used to scatter into */ 99029af93fSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 1001eb62cbbSBarry Smith 101d6dfbf8fSBarry Smith /* create two temporary Index sets for build scatter gather */ 102b9b97703SBarry Smith ierr = ISCreateGeneral(mat->comm,ec,garray,&from);CHKERRQ(ierr); 103029af93fSBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 1041eb62cbbSBarry Smith 1051eb62cbbSBarry Smith /* create temporary global vector to generate scatter context */ 1061eb62cbbSBarry Smith /* this is inefficient, but otherwise we must do either 1071eb62cbbSBarry Smith 1) save garray until the first actual scatter when the vector is known or 1081eb62cbbSBarry Smith 2) have another way of generating a scatter context without a vector.*/ 109273d9f13SBarry Smith ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 1101eb62cbbSBarry Smith 1112d336d48SLois Curfman McInnes /* generate the scatter context */ 11208480c60SBarry Smith ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 113b0a32e0cSBarry Smith PetscLogObjectParent(mat,aij->Mvctx); 114b0a32e0cSBarry Smith PetscLogObjectParent(mat,aij->lvec); 115b0a32e0cSBarry Smith PetscLogObjectParent(mat,from); 116b0a32e0cSBarry Smith PetscLogObjectParent(mat,to); 1179e25ed09SBarry Smith aij->garray = garray; 118b0a32e0cSBarry Smith PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 11978b31e54SBarry Smith ierr = ISDestroy(from);CHKERRQ(ierr); 12078b31e54SBarry Smith ierr = ISDestroy(to);CHKERRQ(ierr); 121888f2ed8SSatish Balay ierr = VecDestroy(gvec);CHKERRQ(ierr); 1223a40ed3dSBarry Smith PetscFunctionReturn(0); 1238c79f6d3SBarry Smith } 1249e25ed09SBarry Smith 1259e25ed09SBarry Smith 1264a2ae208SSatish Balay #undef __FUNCT__ 1274a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPIAIJ" 1282493cbb0SBarry Smith /* 1292493cbb0SBarry Smith Takes the local part of an already assembled MPIAIJ matrix 1302493cbb0SBarry Smith and disassembles it. This is to allow new nonzeros into the matrix 1312493cbb0SBarry Smith that require more communication in the matrix vector multiply. 1322493cbb0SBarry Smith Thus certain data-structures must be rebuilt. 1332493cbb0SBarry Smith 1342493cbb0SBarry Smith Kind of slow! But that's what application programmers get when 1352493cbb0SBarry Smith they are sloppy. 1362493cbb0SBarry Smith */ 1372493cbb0SBarry Smith int DisAssemble_MPIAIJ(Mat A) 1382493cbb0SBarry Smith { 1392493cbb0SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 1402493cbb0SBarry Smith Mat B = aij->B,Bnew; 141ec8511deSBarry Smith Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 142273d9f13SBarry Smith int ierr,i,j,m = B->m,n = A->N,col,ct = 0,*garray = aij->garray; 143*bfec09a0SHong Zhang int *nz,ec; 14487828ca2SBarry Smith PetscScalar v; 1452493cbb0SBarry Smith 1463a40ed3dSBarry Smith PetscFunctionBegin; 1472493cbb0SBarry Smith /* free stuff related to matrix-vec multiply */ 148b0a32e0cSBarry Smith ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 1492493cbb0SBarry Smith ierr = VecDestroy(aij->lvec);CHKERRQ(ierr); aij->lvec = 0; 15008480c60SBarry Smith ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr); aij->Mvctx = 0; 151464493b3SBarry Smith if (aij->colmap) { 152aa482453SBarry Smith #if defined (PETSC_USE_CTABLE) 1530f5bd95cSBarry Smith ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr); 1540f5bd95cSBarry Smith aij->colmap = 0; 1552066d6f7SSatish Balay #else 156606d414cSSatish Balay ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 157606d414cSSatish Balay aij->colmap = 0; 158b0a32e0cSBarry Smith PetscLogObjectMemory(A,-aij->B->n*sizeof(int)); 1592066d6f7SSatish Balay #endif 160464493b3SBarry Smith } 1612493cbb0SBarry Smith 1622493cbb0SBarry Smith /* make sure that B is assembled so we can access its values */ 1636d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164fe2f2677SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1652493cbb0SBarry Smith 1662493cbb0SBarry Smith /* invent new B and copy stuff over */ 167b0a32e0cSBarry Smith ierr = PetscMalloc((m+1)*sizeof(int),&nz);CHKERRQ(ierr); 16848b35521SBarry Smith for (i=0; i<m; i++) { 16948b35521SBarry Smith nz[i] = Baij->i[i+1] - Baij->i[i]; 17048b35521SBarry Smith } 171029af93fSBarry Smith ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,0,nz,&Bnew);CHKERRQ(ierr); 172606d414cSSatish Balay ierr = PetscFree(nz);CHKERRQ(ierr); 1732493cbb0SBarry Smith for (i=0; i<m; i++) { 174*bfec09a0SHong Zhang for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 175*bfec09a0SHong Zhang col = garray[Baij->j[ct]]; 1762493cbb0SBarry Smith v = Baij->a[ct++]; 17783271157SBarry Smith ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 1782493cbb0SBarry Smith } 1792493cbb0SBarry Smith } 180606d414cSSatish Balay ierr = PetscFree(aij->garray);CHKERRQ(ierr); 181606d414cSSatish Balay aij->garray = 0; 182b0a32e0cSBarry Smith PetscLogObjectMemory(A,-ec*sizeof(int)); 1832493cbb0SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 184b0a32e0cSBarry Smith PetscLogObjectParent(A,Bnew); 1852493cbb0SBarry Smith aij->B = Bnew; 186227d817aSBarry Smith A->was_assembled = PETSC_FALSE; 1873a40ed3dSBarry Smith PetscFunctionReturn(0); 1882493cbb0SBarry Smith } 1892493cbb0SBarry Smith 1902cd6534aSBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 1912cd6534aSBarry Smith 1922cd6534aSBarry Smith static int *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 1932cd6534aSBarry Smith parts of the local matrix */ 1942cd6534aSBarry Smith static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 1952cd6534aSBarry Smith 1962cd6534aSBarry Smith 1972cd6534aSBarry Smith #undef __FUNCT__ 1982cd6534aSBarry Smith #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 1992cd6534aSBarry Smith int MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 2002cd6534aSBarry Smith { 2012cd6534aSBarry Smith Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 2022cd6534aSBarry Smith int ierr,i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 2032cd6534aSBarry Smith int *r_rmapd,*r_rmapo; 2042cd6534aSBarry Smith 2052cd6534aSBarry Smith PetscFunctionBegin; 2062cd6534aSBarry Smith ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 2072cd6534aSBarry Smith ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 2082cd6534aSBarry Smith ierr = PetscMalloc((inA->mapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr); 2092cd6534aSBarry Smith ierr = PetscMemzero(r_rmapd,inA->mapping->n*sizeof(int));CHKERRQ(ierr); 2102cd6534aSBarry Smith nt = 0; 2112cd6534aSBarry Smith for (i=0; i<inA->mapping->n; i++) { 2122cd6534aSBarry Smith if (inA->mapping->indices[i] >= cstart && inA->mapping->indices[i] < cend) { 2132cd6534aSBarry Smith nt++; 2142cd6534aSBarry Smith r_rmapd[i] = inA->mapping->indices[i] + 1; 2152cd6534aSBarry Smith } 2162cd6534aSBarry Smith } 2172cd6534aSBarry Smith if (nt != n) SETERRQ2(1,"Hmm nt %d n %d",nt,n); 2182cd6534aSBarry Smith ierr = PetscMalloc((n+1)*sizeof(int),&auglyrmapd);CHKERRQ(ierr); 2192cd6534aSBarry Smith for (i=0; i<inA->mapping->n; i++) { 2202cd6534aSBarry Smith if (r_rmapd[i]){ 2212cd6534aSBarry Smith auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 2222cd6534aSBarry Smith } 2232cd6534aSBarry Smith } 2242cd6534aSBarry Smith ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 2252cd6534aSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 2262cd6534aSBarry Smith 2272cd6534aSBarry Smith ierr = PetscMalloc((inA->N+1)*sizeof(int),&lindices);CHKERRQ(ierr); 2282cd6534aSBarry Smith ierr = PetscMemzero(lindices,inA->N*sizeof(int));CHKERRQ(ierr); 2292cd6534aSBarry Smith for (i=0; i<ina->B->n; i++) { 2302cd6534aSBarry Smith lindices[garray[i]] = i+1; 2312cd6534aSBarry Smith } 2322cd6534aSBarry Smith no = inA->mapping->n - nt; 2332cd6534aSBarry Smith ierr = PetscMalloc((inA->mapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr); 2342cd6534aSBarry Smith ierr = PetscMemzero(r_rmapo,inA->mapping->n*sizeof(int));CHKERRQ(ierr); 2352cd6534aSBarry Smith nt = 0; 2362cd6534aSBarry Smith for (i=0; i<inA->mapping->n; i++) { 2372cd6534aSBarry Smith if (lindices[inA->mapping->indices[i]]) { 2382cd6534aSBarry Smith nt++; 2392cd6534aSBarry Smith r_rmapo[i] = lindices[inA->mapping->indices[i]]; 2402cd6534aSBarry Smith } 2412cd6534aSBarry Smith } 2422cd6534aSBarry Smith if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n); 2432cd6534aSBarry Smith ierr = PetscFree(lindices);CHKERRQ(ierr); 2442cd6534aSBarry Smith ierr = PetscMalloc((nt+1)*sizeof(int),&auglyrmapo);CHKERRQ(ierr); 2452cd6534aSBarry Smith for (i=0; i<inA->mapping->n; i++) { 2462cd6534aSBarry Smith if (r_rmapo[i]){ 2472cd6534aSBarry Smith auglyrmapo[(r_rmapo[i]-1)] = i; 2482cd6534aSBarry Smith } 2492cd6534aSBarry Smith } 2502cd6534aSBarry Smith ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 2512cd6534aSBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 2522cd6534aSBarry Smith 2532cd6534aSBarry Smith PetscFunctionReturn(0); 2542cd6534aSBarry Smith } 2552cd6534aSBarry Smith 2562cd6534aSBarry Smith #undef __FUNCT__ 2572cd6534aSBarry Smith #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 2582cd6534aSBarry Smith int MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 2592cd6534aSBarry Smith { 2602cd6534aSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 2612cd6534aSBarry Smith int ierr,n,i; 2622cd6534aSBarry Smith PetscScalar *d,*o,*s; 2632cd6534aSBarry Smith 2642cd6534aSBarry Smith PetscFunctionBegin; 2652cd6534aSBarry Smith if (!auglyrmapd) { 2662cd6534aSBarry Smith ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 2672cd6534aSBarry Smith } 2682cd6534aSBarry Smith 2692cd6534aSBarry Smith ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 2702cd6534aSBarry Smith 2712cd6534aSBarry Smith ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 2722cd6534aSBarry Smith ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 2732cd6534aSBarry Smith for (i=0; i<n; i++) { 2742cd6534aSBarry Smith d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 2752cd6534aSBarry Smith } 2762cd6534aSBarry Smith ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 2772cd6534aSBarry Smith /* column scale "diagonal" portion of local matrix */ 2782cd6534aSBarry Smith ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 2792cd6534aSBarry Smith 2802cd6534aSBarry Smith ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 2812cd6534aSBarry Smith ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 2822cd6534aSBarry Smith for (i=0; i<n; i++) { 2832cd6534aSBarry Smith o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 2842cd6534aSBarry Smith } 2852cd6534aSBarry Smith ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 2862cd6534aSBarry Smith ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 2872cd6534aSBarry Smith /* column scale "off-diagonal" portion of local matrix */ 2882cd6534aSBarry Smith ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 2892cd6534aSBarry Smith 2902cd6534aSBarry Smith PetscFunctionReturn(0); 2912cd6534aSBarry Smith } 2922cd6534aSBarry Smith 2932cd6534aSBarry Smith 2942cd6534aSBarry Smith 29548b35521SBarry Smith 296