1*8965ea79SLois Curfman McInnes #ifndef lint 2*8965ea79SLois Curfman McInnes static char vcid[] = "$Id: mpiaij.c,v 1.90 1995/10/17 02:21:15 curfman Exp curfman $"; 3*8965ea79SLois Curfman McInnes #endif 4*8965ea79SLois Curfman McInnes 5*8965ea79SLois Curfman McInnes #include "mpidense.h" 6*8965ea79SLois Curfman McInnes #include "vec/vecimpl.h" 7*8965ea79SLois Curfman McInnes #include "inline/spops.h" 8*8965ea79SLois Curfman McInnes 9*8965ea79SLois Curfman McInnes static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n, 10*8965ea79SLois Curfman McInnes int *idxn,Scalar *v,InsertMode addv) 11*8965ea79SLois Curfman McInnes { 12*8965ea79SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 13*8965ea79SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data; 14*8965ea79SLois Curfman McInnes int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 15*8965ea79SLois Curfman McInnes int cstart = aij->cstart, cend = aij->cend,row,col; 16*8965ea79SLois Curfman McInnes int shift = C->indexshift; 17*8965ea79SLois Curfman McInnes 18*8965ea79SLois Curfman McInnes if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) { 19*8965ea79SLois Curfman McInnes SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds"); 20*8965ea79SLois Curfman McInnes } 21*8965ea79SLois Curfman McInnes aij->insertmode = addv; 22*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 23*8965ea79SLois Curfman McInnes if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row"); 24*8965ea79SLois Curfman McInnes if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large"); 25*8965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 26*8965ea79SLois Curfman McInnes row = idxm[i] - rstart; 27*8965ea79SLois Curfman McInnes for ( j=0; j<n; j++ ) { 28*8965ea79SLois Curfman McInnes if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column"); 29*8965ea79SLois Curfman McInnes if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large"); 30*8965ea79SLois Curfman McInnes if (idxn[j] >= cstart && idxn[j] < cend){ 31*8965ea79SLois Curfman McInnes col = idxn[j] - cstart; 32*8965ea79SLois Curfman McInnes ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 33*8965ea79SLois Curfman McInnes } 34*8965ea79SLois Curfman McInnes else { 35*8965ea79SLois Curfman McInnes if (aij->assembled) { 36*8965ea79SLois Curfman McInnes if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 37*8965ea79SLois Curfman McInnes col = aij->colmap[idxn[j]] + shift; 38*8965ea79SLois Curfman McInnes if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 39*8965ea79SLois Curfman McInnes ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 40*8965ea79SLois Curfman McInnes col = idxn[j]; 41*8965ea79SLois Curfman McInnes } 42*8965ea79SLois Curfman McInnes } 43*8965ea79SLois Curfman McInnes else col = idxn[j]; 44*8965ea79SLois Curfman McInnes ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr); 45*8965ea79SLois Curfman McInnes } 46*8965ea79SLois Curfman McInnes } 47*8965ea79SLois Curfman McInnes } 48*8965ea79SLois Curfman McInnes else { 49*8965ea79SLois Curfman McInnes ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr); 50*8965ea79SLois Curfman McInnes } 51*8965ea79SLois Curfman McInnes } 52*8965ea79SLois Curfman McInnes return 0; 53*8965ea79SLois Curfman McInnes } 54*8965ea79SLois Curfman McInnes 55*8965ea79SLois Curfman McInnes static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 56*8965ea79SLois Curfman McInnes { 57*8965ea79SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 58*8965ea79SLois Curfman McInnes MPI_Comm comm = mat->comm; 59*8965ea79SLois Curfman McInnes int size = aij->size, *owners = aij->rowners; 60*8965ea79SLois Curfman McInnes int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 61*8965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 62*8965ea79SLois Curfman McInnes int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 63*8965ea79SLois Curfman McInnes InsertMode addv; 64*8965ea79SLois Curfman McInnes Scalar *rvalues,*svalues; 65*8965ea79SLois Curfman McInnes 66*8965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 67*8965ea79SLois Curfman McInnes MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 68*8965ea79SLois Curfman McInnes if (addv == (ADD_VALUES|INSERT_VALUES)) { 69*8965ea79SLois Curfman McInnes SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added"); 70*8965ea79SLois Curfman McInnes } 71*8965ea79SLois Curfman McInnes aij->insertmode = addv; /* in case this processor had no cache */ 72*8965ea79SLois Curfman McInnes 73*8965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 74*8965ea79SLois Curfman McInnes nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 75*8965ea79SLois Curfman McInnes PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 76*8965ea79SLois Curfman McInnes owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 77*8965ea79SLois Curfman McInnes for ( i=0; i<aij->stash.n; i++ ) { 78*8965ea79SLois Curfman McInnes idx = aij->stash.idx[i]; 79*8965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 80*8965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 81*8965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; break; 82*8965ea79SLois Curfman McInnes } 83*8965ea79SLois Curfman McInnes } 84*8965ea79SLois Curfman McInnes } 85*8965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 86*8965ea79SLois Curfman McInnes 87*8965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 88*8965ea79SLois Curfman McInnes work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 89*8965ea79SLois Curfman McInnes MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 90*8965ea79SLois Curfman McInnes nreceives = work[rank]; 91*8965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 92*8965ea79SLois Curfman McInnes nmax = work[rank]; 93*8965ea79SLois Curfman McInnes PETSCFREE(work); 94*8965ea79SLois Curfman McInnes 95*8965ea79SLois Curfman McInnes /* post receives: 96*8965ea79SLois Curfman McInnes 1) each message will consist of ordered pairs 97*8965ea79SLois Curfman McInnes (global index,value) we store the global index as a double 98*8965ea79SLois Curfman McInnes to simplify the message passing. 99*8965ea79SLois Curfman McInnes 2) since we don't know how long each individual message is we 100*8965ea79SLois Curfman McInnes allocate the largest needed buffer for each receive. Potentially 101*8965ea79SLois Curfman McInnes this is a lot of wasted space. 102*8965ea79SLois Curfman McInnes 103*8965ea79SLois Curfman McInnes 104*8965ea79SLois Curfman McInnes This could be done better. 105*8965ea79SLois Curfman McInnes */ 106*8965ea79SLois Curfman McInnes rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 107*8965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 108*8965ea79SLois Curfman McInnes recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request)); 109*8965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 110*8965ea79SLois Curfman McInnes for ( i=0; i<nreceives; i++ ) { 111*8965ea79SLois Curfman McInnes MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 112*8965ea79SLois Curfman McInnes comm,recv_waits+i); 113*8965ea79SLois Curfman McInnes } 114*8965ea79SLois Curfman McInnes 115*8965ea79SLois Curfman McInnes /* do sends: 116*8965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 117*8965ea79SLois Curfman McInnes the ith processor 118*8965ea79SLois Curfman McInnes */ 119*8965ea79SLois Curfman McInnes svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 120*8965ea79SLois Curfman McInnes send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 121*8965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 122*8965ea79SLois Curfman McInnes starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts); 123*8965ea79SLois Curfman McInnes starts[0] = 0; 124*8965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 125*8965ea79SLois Curfman McInnes for ( i=0; i<aij->stash.n; i++ ) { 126*8965ea79SLois Curfman McInnes svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 127*8965ea79SLois Curfman McInnes svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 128*8965ea79SLois Curfman McInnes svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 129*8965ea79SLois Curfman McInnes } 130*8965ea79SLois Curfman McInnes PETSCFREE(owner); 131*8965ea79SLois Curfman McInnes starts[0] = 0; 132*8965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 133*8965ea79SLois Curfman McInnes count = 0; 134*8965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 135*8965ea79SLois Curfman McInnes if (procs[i]) { 136*8965ea79SLois Curfman McInnes MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 137*8965ea79SLois Curfman McInnes comm,send_waits+count++); 138*8965ea79SLois Curfman McInnes } 139*8965ea79SLois Curfman McInnes } 140*8965ea79SLois Curfman McInnes PETSCFREE(starts); PETSCFREE(nprocs); 141*8965ea79SLois Curfman McInnes 142*8965ea79SLois Curfman McInnes /* Free cache space */ 143*8965ea79SLois Curfman McInnes ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 144*8965ea79SLois Curfman McInnes 145*8965ea79SLois Curfman McInnes aij->svalues = svalues; aij->rvalues = rvalues; 146*8965ea79SLois Curfman McInnes aij->nsends = nsends; aij->nrecvs = nreceives; 147*8965ea79SLois Curfman McInnes aij->send_waits = send_waits; aij->recv_waits = recv_waits; 148*8965ea79SLois Curfman McInnes aij->rmax = nmax; 149*8965ea79SLois Curfman McInnes 150*8965ea79SLois Curfman McInnes return 0; 151*8965ea79SLois Curfman McInnes } 152*8965ea79SLois Curfman McInnes extern int MatSetUpMultiply_MPIAIJ(Mat); 153*8965ea79SLois Curfman McInnes 154*8965ea79SLois Curfman McInnes static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 155*8965ea79SLois Curfman McInnes { 156*8965ea79SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 157*8965ea79SLois Curfman McInnes Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data; 158*8965ea79SLois Curfman McInnes MPI_Status *send_status,recv_status; 159*8965ea79SLois Curfman McInnes int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 160*8965ea79SLois Curfman McInnes int row,col,other_disassembled,shift = C->indexshift; 161*8965ea79SLois Curfman McInnes Scalar *values,val; 162*8965ea79SLois Curfman McInnes InsertMode addv = aij->insertmode; 163*8965ea79SLois Curfman McInnes 164*8965ea79SLois Curfman McInnes /* wait on receives */ 165*8965ea79SLois Curfman McInnes while (count) { 166*8965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 167*8965ea79SLois Curfman McInnes /* unpack receives into our local space */ 168*8965ea79SLois Curfman McInnes values = aij->rvalues + 3*imdex*aij->rmax; 169*8965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 170*8965ea79SLois Curfman McInnes n = n/3; 171*8965ea79SLois Curfman McInnes for ( i=0; i<n; i++ ) { 172*8965ea79SLois Curfman McInnes row = (int) PETSCREAL(values[3*i]) - aij->rstart; 173*8965ea79SLois Curfman McInnes col = (int) PETSCREAL(values[3*i+1]); 174*8965ea79SLois Curfman McInnes val = values[3*i+2]; 175*8965ea79SLois Curfman McInnes if (col >= aij->cstart && col < aij->cend) { 176*8965ea79SLois Curfman McInnes col -= aij->cstart; 177*8965ea79SLois Curfman McInnes MatSetValues(aij->A,1,&row,1,&col,&val,addv); 178*8965ea79SLois Curfman McInnes } 179*8965ea79SLois Curfman McInnes else { 180*8965ea79SLois Curfman McInnes if (aij->assembled) { 181*8965ea79SLois Curfman McInnes if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);} 182*8965ea79SLois Curfman McInnes col = aij->colmap[col] + shift; 183*8965ea79SLois Curfman McInnes if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 184*8965ea79SLois Curfman McInnes ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 185*8965ea79SLois Curfman McInnes col = (int) PETSCREAL(values[3*i+1]); 186*8965ea79SLois Curfman McInnes } 187*8965ea79SLois Curfman McInnes } 188*8965ea79SLois Curfman McInnes MatSetValues(aij->B,1,&row,1,&col,&val,addv); 189*8965ea79SLois Curfman McInnes } 190*8965ea79SLois Curfman McInnes } 191*8965ea79SLois Curfman McInnes count--; 192*8965ea79SLois Curfman McInnes } 193*8965ea79SLois Curfman McInnes PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues); 194*8965ea79SLois Curfman McInnes 195*8965ea79SLois Curfman McInnes /* wait on sends */ 196*8965ea79SLois Curfman McInnes if (aij->nsends) { 197*8965ea79SLois Curfman McInnes send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status)); 198*8965ea79SLois Curfman McInnes CHKPTRQ(send_status); 199*8965ea79SLois Curfman McInnes MPI_Waitall(aij->nsends,aij->send_waits,send_status); 200*8965ea79SLois Curfman McInnes PETSCFREE(send_status); 201*8965ea79SLois Curfman McInnes } 202*8965ea79SLois Curfman McInnes PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues); 203*8965ea79SLois Curfman McInnes 204*8965ea79SLois Curfman McInnes aij->insertmode = NOT_SET_VALUES; 205*8965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 206*8965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 207*8965ea79SLois Curfman McInnes 208*8965ea79SLois Curfman McInnes /* determine if any processor has disassembled, if so we must 209*8965ea79SLois Curfman McInnes also disassemble ourselfs, in order that we may reassemble. */ 210*8965ea79SLois Curfman McInnes MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 211*8965ea79SLois Curfman McInnes if (aij->assembled && !other_disassembled) { 212*8965ea79SLois Curfman McInnes ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 213*8965ea79SLois Curfman McInnes } 214*8965ea79SLois Curfman McInnes 215*8965ea79SLois Curfman McInnes if (!aij->assembled && mode == FINAL_ASSEMBLY) { 216*8965ea79SLois Curfman McInnes ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 217*8965ea79SLois Curfman McInnes aij->assembled = 1; 218*8965ea79SLois Curfman McInnes } 219*8965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 220*8965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 221*8965ea79SLois Curfman McInnes 222*8965ea79SLois Curfman McInnes return 0; 223*8965ea79SLois Curfman McInnes } 224*8965ea79SLois Curfman McInnes 225*8965ea79SLois Curfman McInnes static int MatZeroEntries_MPIAIJ(Mat A) 226*8965ea79SLois Curfman McInnes { 227*8965ea79SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 228*8965ea79SLois Curfman McInnes int ierr; 229*8965ea79SLois Curfman McInnes ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 230*8965ea79SLois Curfman McInnes ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 231*8965ea79SLois Curfman McInnes return 0; 232*8965ea79SLois Curfman McInnes } 233*8965ea79SLois Curfman McInnes 234*8965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 235*8965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 236*8965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 237*8965ea79SLois Curfman McInnes aij->A and aij->B directly and not through the MatZeroRows() 238*8965ea79SLois Curfman McInnes routine. 239*8965ea79SLois Curfman McInnes */ 240*8965ea79SLois Curfman McInnes static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 241*8965ea79SLois Curfman McInnes { 242*8965ea79SLois Curfman McInnes Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 243*8965ea79SLois Curfman McInnes int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 244*8965ea79SLois Curfman McInnes int *procs,*nprocs,j,found,idx,nsends,*work; 245*8965ea79SLois Curfman McInnes int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 246*8965ea79SLois Curfman McInnes int *rvalues,tag = A->tag,count,base,slen,n,*source; 247*8965ea79SLois Curfman McInnes int *lens,imdex,*lrows,*values; 248*8965ea79SLois Curfman McInnes MPI_Comm comm = A->comm; 249*8965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 250*8965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 251*8965ea79SLois Curfman McInnes IS istmp; 252*8965ea79SLois Curfman McInnes 253*8965ea79SLois Curfman McInnes if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix"); 254*8965ea79SLois Curfman McInnes ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr); 255*8965ea79SLois Curfman McInnes ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 256*8965ea79SLois Curfman McInnes 257*8965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 258*8965ea79SLois Curfman McInnes nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 259*8965ea79SLois Curfman McInnes PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 260*8965ea79SLois Curfman McInnes owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 261*8965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 262*8965ea79SLois Curfman McInnes idx = rows[i]; 263*8965ea79SLois Curfman McInnes found = 0; 264*8965ea79SLois Curfman McInnes for ( j=0; j<size; j++ ) { 265*8965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 266*8965ea79SLois Curfman McInnes nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 267*8965ea79SLois Curfman McInnes } 268*8965ea79SLois Curfman McInnes } 269*8965ea79SLois Curfman McInnes if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range"); 270*8965ea79SLois Curfman McInnes } 271*8965ea79SLois Curfman McInnes nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 272*8965ea79SLois Curfman McInnes 273*8965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 274*8965ea79SLois Curfman McInnes work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work); 275*8965ea79SLois Curfman McInnes MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 276*8965ea79SLois Curfman McInnes nrecvs = work[rank]; 277*8965ea79SLois Curfman McInnes MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 278*8965ea79SLois Curfman McInnes nmax = work[rank]; 279*8965ea79SLois Curfman McInnes PETSCFREE(work); 280*8965ea79SLois Curfman McInnes 281*8965ea79SLois Curfman McInnes /* post receives: */ 282*8965ea79SLois Curfman McInnes rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */ 283*8965ea79SLois Curfman McInnes CHKPTRQ(rvalues); 284*8965ea79SLois Curfman McInnes recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request)); 285*8965ea79SLois Curfman McInnes CHKPTRQ(recv_waits); 286*8965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 287*8965ea79SLois Curfman McInnes MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 288*8965ea79SLois Curfman McInnes } 289*8965ea79SLois Curfman McInnes 290*8965ea79SLois Curfman McInnes /* do sends: 291*8965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 292*8965ea79SLois Curfman McInnes the ith processor 293*8965ea79SLois Curfman McInnes */ 294*8965ea79SLois Curfman McInnes svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 295*8965ea79SLois Curfman McInnes send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request)); 296*8965ea79SLois Curfman McInnes CHKPTRQ(send_waits); 297*8965ea79SLois Curfman McInnes starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts); 298*8965ea79SLois Curfman McInnes starts[0] = 0; 299*8965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 300*8965ea79SLois Curfman McInnes for ( i=0; i<N; i++ ) { 301*8965ea79SLois Curfman McInnes svalues[starts[owner[i]]++] = rows[i]; 302*8965ea79SLois Curfman McInnes } 303*8965ea79SLois Curfman McInnes ISRestoreIndices(is,&rows); 304*8965ea79SLois Curfman McInnes 305*8965ea79SLois Curfman McInnes starts[0] = 0; 306*8965ea79SLois Curfman McInnes for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 307*8965ea79SLois Curfman McInnes count = 0; 308*8965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 309*8965ea79SLois Curfman McInnes if (procs[i]) { 310*8965ea79SLois Curfman McInnes MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 311*8965ea79SLois Curfman McInnes } 312*8965ea79SLois Curfman McInnes } 313*8965ea79SLois Curfman McInnes PETSCFREE(starts); 314*8965ea79SLois Curfman McInnes 315*8965ea79SLois Curfman McInnes base = owners[rank]; 316*8965ea79SLois Curfman McInnes 317*8965ea79SLois Curfman McInnes /* wait on receives */ 318*8965ea79SLois Curfman McInnes lens = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 319*8965ea79SLois Curfman McInnes source = lens + nrecvs; 320*8965ea79SLois Curfman McInnes count = nrecvs; slen = 0; 321*8965ea79SLois Curfman McInnes while (count) { 322*8965ea79SLois Curfman McInnes MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 323*8965ea79SLois Curfman McInnes /* unpack receives into our local space */ 324*8965ea79SLois Curfman McInnes MPI_Get_count(&recv_status,MPI_INT,&n); 325*8965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 326*8965ea79SLois Curfman McInnes lens[imdex] = n; 327*8965ea79SLois Curfman McInnes slen += n; 328*8965ea79SLois Curfman McInnes count--; 329*8965ea79SLois Curfman McInnes } 330*8965ea79SLois Curfman McInnes PETSCFREE(recv_waits); 331*8965ea79SLois Curfman McInnes 332*8965ea79SLois Curfman McInnes /* move the data into the send scatter */ 333*8965ea79SLois Curfman McInnes lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 334*8965ea79SLois Curfman McInnes count = 0; 335*8965ea79SLois Curfman McInnes for ( i=0; i<nrecvs; i++ ) { 336*8965ea79SLois Curfman McInnes values = rvalues + i*nmax; 337*8965ea79SLois Curfman McInnes for ( j=0; j<lens[i]; j++ ) { 338*8965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 339*8965ea79SLois Curfman McInnes } 340*8965ea79SLois Curfman McInnes } 341*8965ea79SLois Curfman McInnes PETSCFREE(rvalues); PETSCFREE(lens); 342*8965ea79SLois Curfman McInnes PETSCFREE(owner); PETSCFREE(nprocs); 343*8965ea79SLois Curfman McInnes 344*8965ea79SLois Curfman McInnes /* actually zap the local rows */ 345*8965ea79SLois Curfman McInnes ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 346*8965ea79SLois Curfman McInnes PLogObjectParent(A,istmp); 347*8965ea79SLois Curfman McInnes PETSCFREE(lrows); 348*8965ea79SLois Curfman McInnes ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 349*8965ea79SLois Curfman McInnes ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 350*8965ea79SLois Curfman McInnes ierr = ISDestroy(istmp); CHKERRQ(ierr); 351*8965ea79SLois Curfman McInnes 352*8965ea79SLois Curfman McInnes /* wait on sends */ 353*8965ea79SLois Curfman McInnes if (nsends) { 354*8965ea79SLois Curfman McInnes send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status)); 355*8965ea79SLois Curfman McInnes CHKPTRQ(send_status); 356*8965ea79SLois Curfman McInnes MPI_Waitall(nsends,send_waits,send_status); 357*8965ea79SLois Curfman McInnes PETSCFREE(send_status); 358*8965ea79SLois Curfman McInnes } 359*8965ea79SLois Curfman McInnes PETSCFREE(send_waits); PETSCFREE(svalues); 360*8965ea79SLois Curfman McInnes 361*8965ea79SLois Curfman McInnes return 0; 362*8965ea79SLois Curfman McInnes } 363*8965ea79SLois Curfman McInnes 364*8965ea79SLois Curfman McInnes static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 365*8965ea79SLois Curfman McInnes { 366*8965ea79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 367*8965ea79SLois Curfman McInnes int ierr; 368*8965ea79SLois Curfman McInnes 369*8965ea79SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 370*8965ea79SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 371*8965ea79SLois Curfman McInnes ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr); 372*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 373*8965ea79SLois Curfman McInnes ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 374*8965ea79SLois Curfman McInnes return 0; 375*8965ea79SLois Curfman McInnes } 376*8965ea79SLois Curfman McInnes 377*8965ea79SLois Curfman McInnes static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 378*8965ea79SLois Curfman McInnes { 379*8965ea79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 380*8965ea79SLois Curfman McInnes int ierr; 381*8965ea79SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix"); 382*8965ea79SLois Curfman McInnes ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 383*8965ea79SLois Curfman McInnes ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 384*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 385*8965ea79SLois Curfman McInnes ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 386*8965ea79SLois Curfman McInnes return 0; 387*8965ea79SLois Curfman McInnes } 388*8965ea79SLois Curfman McInnes 389*8965ea79SLois Curfman McInnes static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 390*8965ea79SLois Curfman McInnes { 391*8965ea79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 392*8965ea79SLois Curfman McInnes int ierr; 393*8965ea79SLois Curfman McInnes 394*8965ea79SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix"); 395*8965ea79SLois Curfman McInnes /* do nondiagonal part */ 396*8965ea79SLois Curfman McInnes ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 397*8965ea79SLois Curfman McInnes /* send it on its way */ 398*8965ea79SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 399*8965ea79SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 400*8965ea79SLois Curfman McInnes /* do local part */ 401*8965ea79SLois Curfman McInnes ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr); 402*8965ea79SLois Curfman McInnes /* receive remote parts: note this assumes the values are not actually */ 403*8965ea79SLois Curfman McInnes /* inserted in yy until the next line, which is true for my implementation*/ 404*8965ea79SLois Curfman McInnes /* but is not perhaps always true. */ 405*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 406*8965ea79SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 407*8965ea79SLois Curfman McInnes return 0; 408*8965ea79SLois Curfman McInnes } 409*8965ea79SLois Curfman McInnes 410*8965ea79SLois Curfman McInnes static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 411*8965ea79SLois Curfman McInnes { 412*8965ea79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 413*8965ea79SLois Curfman McInnes int ierr; 414*8965ea79SLois Curfman McInnes 415*8965ea79SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix"); 416*8965ea79SLois Curfman McInnes /* do nondiagonal part */ 417*8965ea79SLois Curfman McInnes ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr); 418*8965ea79SLois Curfman McInnes /* send it on its way */ 419*8965ea79SLois Curfman McInnes ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 420*8965ea79SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 421*8965ea79SLois Curfman McInnes /* do local part */ 422*8965ea79SLois Curfman McInnes ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr); 423*8965ea79SLois Curfman McInnes /* receive remote parts: note this assumes the values are not actually */ 424*8965ea79SLois Curfman McInnes /* inserted in yy until the next line, which is true for my implementation*/ 425*8965ea79SLois Curfman McInnes /* but is not perhaps always true. */ 426*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 427*8965ea79SLois Curfman McInnes (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 428*8965ea79SLois Curfman McInnes return 0; 429*8965ea79SLois Curfman McInnes } 430*8965ea79SLois Curfman McInnes 431*8965ea79SLois Curfman McInnes /* 432*8965ea79SLois Curfman McInnes This only works correctly for square matrices where the subblock A->A is the 433*8965ea79SLois Curfman McInnes diagonal block 434*8965ea79SLois Curfman McInnes */ 435*8965ea79SLois Curfman McInnes static int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 436*8965ea79SLois Curfman McInnes { 437*8965ea79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 438*8965ea79SLois Curfman McInnes if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix"); 439*8965ea79SLois Curfman McInnes return MatGetDiagonal(a->A,v); 440*8965ea79SLois Curfman McInnes } 441*8965ea79SLois Curfman McInnes 442*8965ea79SLois Curfman McInnes static int MatDestroy_MPIAIJ(PetscObject obj) 443*8965ea79SLois Curfman McInnes { 444*8965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 445*8965ea79SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 446*8965ea79SLois Curfman McInnes int ierr; 447*8965ea79SLois Curfman McInnes #if defined(PETSC_LOG) 448*8965ea79SLois Curfman McInnes PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 449*8965ea79SLois Curfman McInnes #endif 450*8965ea79SLois Curfman McInnes PETSCFREE(aij->rowners); 451*8965ea79SLois Curfman McInnes ierr = MatDestroy(aij->A); CHKERRQ(ierr); 452*8965ea79SLois Curfman McInnes ierr = MatDestroy(aij->B); CHKERRQ(ierr); 453*8965ea79SLois Curfman McInnes if (aij->colmap) PETSCFREE(aij->colmap); 454*8965ea79SLois Curfman McInnes if (aij->garray) PETSCFREE(aij->garray); 455*8965ea79SLois Curfman McInnes if (aij->lvec) VecDestroy(aij->lvec); 456*8965ea79SLois Curfman McInnes if (aij->Mvctx) VecScatterCtxDestroy(aij->Mvctx); 457*8965ea79SLois Curfman McInnes PETSCFREE(aij); 458*8965ea79SLois Curfman McInnes PLogObjectDestroy(mat); 459*8965ea79SLois Curfman McInnes PETSCHEADERDESTROY(mat); 460*8965ea79SLois Curfman McInnes return 0; 461*8965ea79SLois Curfman McInnes } 462*8965ea79SLois Curfman McInnes #include "draw.h" 463*8965ea79SLois Curfman McInnes #include "pinclude/pviewer.h" 464*8965ea79SLois Curfman McInnes 465*8965ea79SLois Curfman McInnes static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 466*8965ea79SLois Curfman McInnes { 467*8965ea79SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 468*8965ea79SLois Curfman McInnes int ierr; 469*8965ea79SLois Curfman McInnes 470*8965ea79SLois Curfman McInnes if (aij->size == 1) { 471*8965ea79SLois Curfman McInnes ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 472*8965ea79SLois Curfman McInnes } 473*8965ea79SLois Curfman McInnes else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported"); 474*8965ea79SLois Curfman McInnes return 0; 475*8965ea79SLois Curfman McInnes } 476*8965ea79SLois Curfman McInnes 477*8965ea79SLois Curfman McInnes static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer) 478*8965ea79SLois Curfman McInnes { 479*8965ea79SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 480*8965ea79SLois Curfman McInnes Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 481*8965ea79SLois Curfman McInnes int ierr, format,shift = C->indexshift; 482*8965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 483*8965ea79SLois Curfman McInnes FILE *fd; 484*8965ea79SLois Curfman McInnes 485*8965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) { 486*8965ea79SLois Curfman McInnes ierr = ViewerFileGetFormat_Private(viewer,&format); 487*8965ea79SLois Curfman McInnes if (format == FILE_FORMAT_INFO) { 488*8965ea79SLois Curfman McInnes return 0; 489*8965ea79SLois Curfman McInnes } 490*8965ea79SLois Curfman McInnes } 491*8965ea79SLois Curfman McInnes 492*8965ea79SLois Curfman McInnes if (vobj->type == ASCII_FILE_VIEWER) { 493*8965ea79SLois Curfman McInnes ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr); 494*8965ea79SLois Curfman McInnes MPIU_Seq_begin(mat->comm,1); 495*8965ea79SLois Curfman McInnes fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 496*8965ea79SLois Curfman McInnes aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 497*8965ea79SLois Curfman McInnes aij->cend); 498*8965ea79SLois Curfman McInnes ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 499*8965ea79SLois Curfman McInnes ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 500*8965ea79SLois Curfman McInnes fflush(fd); 501*8965ea79SLois Curfman McInnes MPIU_Seq_end(mat->comm,1); 502*8965ea79SLois Curfman McInnes } 503*8965ea79SLois Curfman McInnes else { 504*8965ea79SLois Curfman McInnes int size = aij->size, rank = aij->rank; 505*8965ea79SLois Curfman McInnes if (size == 1) { 506*8965ea79SLois Curfman McInnes ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 507*8965ea79SLois Curfman McInnes } 508*8965ea79SLois Curfman McInnes else { 509*8965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 510*8965ea79SLois Curfman McInnes Mat A; 511*8965ea79SLois Curfman McInnes Mat_SeqAIJ *Aloc; 512*8965ea79SLois Curfman McInnes int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 513*8965ea79SLois Curfman McInnes Scalar *a; 514*8965ea79SLois Curfman McInnes 515*8965ea79SLois Curfman McInnes if (!rank) { 516*8965ea79SLois Curfman McInnes ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr); 517*8965ea79SLois Curfman McInnes } 518*8965ea79SLois Curfman McInnes else { 519*8965ea79SLois Curfman McInnes ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr); 520*8965ea79SLois Curfman McInnes } 521*8965ea79SLois Curfman McInnes PLogObjectParent(mat,A); 522*8965ea79SLois Curfman McInnes 523*8965ea79SLois Curfman McInnes 524*8965ea79SLois Curfman McInnes /* copy over the A part */ 525*8965ea79SLois Curfman McInnes Aloc = (Mat_SeqAIJ*) aij->A->data; 526*8965ea79SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 527*8965ea79SLois Curfman McInnes row = aij->rstart; 528*8965ea79SLois Curfman McInnes for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 529*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 530*8965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 531*8965ea79SLois Curfman McInnes row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 532*8965ea79SLois Curfman McInnes } 533*8965ea79SLois Curfman McInnes aj = Aloc->j; 534*8965ea79SLois Curfman McInnes for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 535*8965ea79SLois Curfman McInnes 536*8965ea79SLois Curfman McInnes /* copy over the B part */ 537*8965ea79SLois Curfman McInnes Aloc = (Mat_SeqAIJ*) aij->B->data; 538*8965ea79SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 539*8965ea79SLois Curfman McInnes row = aij->rstart; 540*8965ea79SLois Curfman McInnes ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 541*8965ea79SLois Curfman McInnes for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 542*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 543*8965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 544*8965ea79SLois Curfman McInnes row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 545*8965ea79SLois Curfman McInnes } 546*8965ea79SLois Curfman McInnes PETSCFREE(ct); 547*8965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 548*8965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 549*8965ea79SLois Curfman McInnes if (!rank) { 550*8965ea79SLois Curfman McInnes ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 551*8965ea79SLois Curfman McInnes } 552*8965ea79SLois Curfman McInnes ierr = MatDestroy(A); CHKERRQ(ierr); 553*8965ea79SLois Curfman McInnes } 554*8965ea79SLois Curfman McInnes } 555*8965ea79SLois Curfman McInnes return 0; 556*8965ea79SLois Curfman McInnes } 557*8965ea79SLois Curfman McInnes 558*8965ea79SLois Curfman McInnes static int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 559*8965ea79SLois Curfman McInnes { 560*8965ea79SLois Curfman McInnes Mat mat = (Mat) obj; 561*8965ea79SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 562*8965ea79SLois Curfman McInnes int ierr; 563*8965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) viewer; 564*8965ea79SLois Curfman McInnes 565*8965ea79SLois Curfman McInnes if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix"); 566*8965ea79SLois Curfman McInnes if (!viewer) { 567*8965ea79SLois Curfman McInnes viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer; 568*8965ea79SLois Curfman McInnes } 569*8965ea79SLois Curfman McInnes if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0; 570*8965ea79SLois Curfman McInnes else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) { 571*8965ea79SLois Curfman McInnes ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 572*8965ea79SLois Curfman McInnes } 573*8965ea79SLois Curfman McInnes else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) { 574*8965ea79SLois Curfman McInnes ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 575*8965ea79SLois Curfman McInnes } 576*8965ea79SLois Curfman McInnes else if (vobj->cookie == DRAW_COOKIE) { 577*8965ea79SLois Curfman McInnes ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr); 578*8965ea79SLois Curfman McInnes } 579*8965ea79SLois Curfman McInnes else if (vobj->type == BINARY_FILE_VIEWER) { 580*8965ea79SLois Curfman McInnes return MatView_MPIAIJ_Binary(mat,viewer); 581*8965ea79SLois Curfman McInnes } 582*8965ea79SLois Curfman McInnes return 0; 583*8965ea79SLois Curfman McInnes } 584*8965ea79SLois Curfman McInnes 585*8965ea79SLois Curfman McInnes extern int MatMarkDiag_SeqAIJ(Mat); 586*8965ea79SLois Curfman McInnes /* 587*8965ea79SLois Curfman McInnes This has to provide several versions. 588*8965ea79SLois Curfman McInnes 589*8965ea79SLois Curfman McInnes 1) per sequential 590*8965ea79SLois Curfman McInnes 2) a) use only local smoothing updating outer values only once. 591*8965ea79SLois Curfman McInnes b) local smoothing updating outer values each inner iteration 592*8965ea79SLois Curfman McInnes 3) color updating out values betwen colors. 593*8965ea79SLois Curfman McInnes */ 594*8965ea79SLois Curfman McInnes static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 595*8965ea79SLois Curfman McInnes double fshift,int its,Vec xx) 596*8965ea79SLois Curfman McInnes { 597*8965ea79SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 598*8965ea79SLois Curfman McInnes Mat AA = mat->A, BB = mat->B; 599*8965ea79SLois Curfman McInnes Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 600*8965ea79SLois Curfman McInnes Scalar zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts; 601*8965ea79SLois Curfman McInnes int ierr,*idx, *diag; 602*8965ea79SLois Curfman McInnes int n = mat->n, m = mat->m, i,shift = A->indexshift; 603*8965ea79SLois Curfman McInnes Vec tt; 604*8965ea79SLois Curfman McInnes 605*8965ea79SLois Curfman McInnes if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix"); 606*8965ea79SLois Curfman McInnes 607*8965ea79SLois Curfman McInnes VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 608*8965ea79SLois Curfman McInnes xs = x + shift; /* shift by one for index start of 1 */ 609*8965ea79SLois Curfman McInnes ls = ls + shift; 610*8965ea79SLois Curfman McInnes if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;} 611*8965ea79SLois Curfman McInnes diag = A->diag; 612*8965ea79SLois Curfman McInnes if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) { 613*8965ea79SLois Curfman McInnes SETERRQ(1,"MatRelax_MPIAIJ:Option not supported"); 614*8965ea79SLois Curfman McInnes } 615*8965ea79SLois Curfman McInnes if (flag & SOR_EISENSTAT) { 616*8965ea79SLois Curfman McInnes /* Let A = L + U + D; where L is lower trianglar, 617*8965ea79SLois Curfman McInnes U is upper triangular, E is diagonal; This routine applies 618*8965ea79SLois Curfman McInnes 619*8965ea79SLois Curfman McInnes (L + E)^{-1} A (U + E)^{-1} 620*8965ea79SLois Curfman McInnes 621*8965ea79SLois Curfman McInnes to a vector efficiently using Eisenstat's trick. This is for 622*8965ea79SLois Curfman McInnes the case of SSOR preconditioner, so E is D/omega where omega 623*8965ea79SLois Curfman McInnes is the relaxation factor. 624*8965ea79SLois Curfman McInnes */ 625*8965ea79SLois Curfman McInnes ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr); 626*8965ea79SLois Curfman McInnes PLogObjectParent(matin,tt); 627*8965ea79SLois Curfman McInnes VecGetArray(tt,&t); 628*8965ea79SLois Curfman McInnes scale = (2.0/omega) - 1.0; 629*8965ea79SLois Curfman McInnes /* x = (E + U)^{-1} b */ 630*8965ea79SLois Curfman McInnes VecSet(&zero,mat->lvec); 631*8965ea79SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 632*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 633*8965ea79SLois Curfman McInnes for ( i=m-1; i>-1; i-- ) { 634*8965ea79SLois Curfman McInnes n = A->i[i+1] - diag[i] - 1; 635*8965ea79SLois Curfman McInnes idx = A->j + diag[i] + !shift; 636*8965ea79SLois Curfman McInnes v = A->a + diag[i] + !shift; 637*8965ea79SLois Curfman McInnes sum = b[i]; 638*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 639*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 640*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 641*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 642*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 643*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 644*8965ea79SLois Curfman McInnes x[i] = omega*(sum/d); 645*8965ea79SLois Curfman McInnes } 646*8965ea79SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 647*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 648*8965ea79SLois Curfman McInnes 649*8965ea79SLois Curfman McInnes /* t = b - (2*E - D)x */ 650*8965ea79SLois Curfman McInnes v = A->a; 651*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; } 652*8965ea79SLois Curfman McInnes 653*8965ea79SLois Curfman McInnes /* t = (E + L)^{-1}t */ 654*8965ea79SLois Curfman McInnes ts = t + shift; /* shifted by one for index start of a or mat->j*/ 655*8965ea79SLois Curfman McInnes diag = A->diag; 656*8965ea79SLois Curfman McInnes VecSet(&zero,mat->lvec); 657*8965ea79SLois Curfman McInnes ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 658*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 659*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 660*8965ea79SLois Curfman McInnes n = diag[i] - A->i[i]; 661*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 662*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 663*8965ea79SLois Curfman McInnes sum = t[i]; 664*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ts,v,idx,n); 665*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 666*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 667*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 668*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 669*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 670*8965ea79SLois Curfman McInnes t[i] = omega*(sum/d); 671*8965ea79SLois Curfman McInnes } 672*8965ea79SLois Curfman McInnes ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 673*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 674*8965ea79SLois Curfman McInnes /* x = x + t */ 675*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { x[i] += t[i]; } 676*8965ea79SLois Curfman McInnes VecDestroy(tt); 677*8965ea79SLois Curfman McInnes return 0; 678*8965ea79SLois Curfman McInnes } 679*8965ea79SLois Curfman McInnes 680*8965ea79SLois Curfman McInnes 681*8965ea79SLois Curfman McInnes if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){ 682*8965ea79SLois Curfman McInnes if (flag & SOR_ZERO_INITIAL_GUESS) { 683*8965ea79SLois Curfman McInnes VecSet(&zero,mat->lvec); VecSet(&zero,xx); 684*8965ea79SLois Curfman McInnes } 685*8965ea79SLois Curfman McInnes else { 686*8965ea79SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 687*8965ea79SLois Curfman McInnes CHKERRQ(ierr); 688*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 689*8965ea79SLois Curfman McInnes CHKERRQ(ierr); 690*8965ea79SLois Curfman McInnes } 691*8965ea79SLois Curfman McInnes while (its--) { 692*8965ea79SLois Curfman McInnes /* go down through the rows */ 693*8965ea79SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 694*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 695*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 696*8965ea79SLois Curfman McInnes n = A->i[i+1] - A->i[i]; 697*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 698*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 699*8965ea79SLois Curfman McInnes sum = b[i]; 700*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 701*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 702*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 703*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 704*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 705*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 706*8965ea79SLois Curfman McInnes x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 707*8965ea79SLois Curfman McInnes } 708*8965ea79SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 709*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 710*8965ea79SLois Curfman McInnes /* come up through the rows */ 711*8965ea79SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 712*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 713*8965ea79SLois Curfman McInnes for ( i=m-1; i>-1; i-- ) { 714*8965ea79SLois Curfman McInnes n = A->i[i+1] - A->i[i]; 715*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 716*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 717*8965ea79SLois Curfman McInnes sum = b[i]; 718*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 719*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 720*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 721*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 722*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 723*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 724*8965ea79SLois Curfman McInnes x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 725*8965ea79SLois Curfman McInnes } 726*8965ea79SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 727*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 728*8965ea79SLois Curfman McInnes } 729*8965ea79SLois Curfman McInnes } 730*8965ea79SLois Curfman McInnes else if (flag & SOR_FORWARD_SWEEP){ 731*8965ea79SLois Curfman McInnes if (flag & SOR_ZERO_INITIAL_GUESS) { 732*8965ea79SLois Curfman McInnes VecSet(&zero,mat->lvec); 733*8965ea79SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 734*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 735*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 736*8965ea79SLois Curfman McInnes n = diag[i] - A->i[i]; 737*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 738*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 739*8965ea79SLois Curfman McInnes sum = b[i]; 740*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 741*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 742*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 743*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 744*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 745*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 746*8965ea79SLois Curfman McInnes x[i] = omega*(sum/d); 747*8965ea79SLois Curfman McInnes } 748*8965ea79SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 749*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 750*8965ea79SLois Curfman McInnes its--; 751*8965ea79SLois Curfman McInnes } 752*8965ea79SLois Curfman McInnes while (its--) { 753*8965ea79SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 754*8965ea79SLois Curfman McInnes CHKERRQ(ierr); 755*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx); 756*8965ea79SLois Curfman McInnes CHKERRQ(ierr); 757*8965ea79SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 758*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 759*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 760*8965ea79SLois Curfman McInnes n = A->i[i+1] - A->i[i]; 761*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 762*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 763*8965ea79SLois Curfman McInnes sum = b[i]; 764*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 765*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 766*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 767*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 768*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 769*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 770*8965ea79SLois Curfman McInnes x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 771*8965ea79SLois Curfman McInnes } 772*8965ea79SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN, 773*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 774*8965ea79SLois Curfman McInnes } 775*8965ea79SLois Curfman McInnes } 776*8965ea79SLois Curfman McInnes else if (flag & SOR_BACKWARD_SWEEP){ 777*8965ea79SLois Curfman McInnes if (flag & SOR_ZERO_INITIAL_GUESS) { 778*8965ea79SLois Curfman McInnes VecSet(&zero,mat->lvec); 779*8965ea79SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 780*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 781*8965ea79SLois Curfman McInnes for ( i=m-1; i>-1; i-- ) { 782*8965ea79SLois Curfman McInnes n = A->i[i+1] - diag[i] - 1; 783*8965ea79SLois Curfman McInnes idx = A->j + diag[i] + !shift; 784*8965ea79SLois Curfman McInnes v = A->a + diag[i] + !shift; 785*8965ea79SLois Curfman McInnes sum = b[i]; 786*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 787*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 788*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 789*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 790*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 791*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 792*8965ea79SLois Curfman McInnes x[i] = omega*(sum/d); 793*8965ea79SLois Curfman McInnes } 794*8965ea79SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 795*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 796*8965ea79SLois Curfman McInnes its--; 797*8965ea79SLois Curfman McInnes } 798*8965ea79SLois Curfman McInnes while (its--) { 799*8965ea79SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 800*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 801*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN, 802*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 803*8965ea79SLois Curfman McInnes ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 804*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 805*8965ea79SLois Curfman McInnes for ( i=m-1; i>-1; i-- ) { 806*8965ea79SLois Curfman McInnes n = A->i[i+1] - A->i[i]; 807*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 808*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 809*8965ea79SLois Curfman McInnes sum = b[i]; 810*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 811*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 812*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 813*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 814*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 815*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 816*8965ea79SLois Curfman McInnes x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 817*8965ea79SLois Curfman McInnes } 818*8965ea79SLois Curfman McInnes ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP, 819*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 820*8965ea79SLois Curfman McInnes } 821*8965ea79SLois Curfman McInnes } 822*8965ea79SLois Curfman McInnes else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 823*8965ea79SLois Curfman McInnes if (flag & SOR_ZERO_INITIAL_GUESS) { 824*8965ea79SLois Curfman McInnes return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 825*8965ea79SLois Curfman McInnes } 826*8965ea79SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 827*8965ea79SLois Curfman McInnes CHKERRQ(ierr); 828*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 829*8965ea79SLois Curfman McInnes CHKERRQ(ierr); 830*8965ea79SLois Curfman McInnes while (its--) { 831*8965ea79SLois Curfman McInnes /* go down through the rows */ 832*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 833*8965ea79SLois Curfman McInnes n = A->i[i+1] - A->i[i]; 834*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 835*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 836*8965ea79SLois Curfman McInnes sum = b[i]; 837*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 838*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 839*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 840*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 841*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 842*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 843*8965ea79SLois Curfman McInnes x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 844*8965ea79SLois Curfman McInnes } 845*8965ea79SLois Curfman McInnes /* come up through the rows */ 846*8965ea79SLois Curfman McInnes for ( i=m-1; i>-1; i-- ) { 847*8965ea79SLois Curfman McInnes n = A->i[i+1] - A->i[i]; 848*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 849*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 850*8965ea79SLois Curfman McInnes sum = b[i]; 851*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 852*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 853*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 854*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 855*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 856*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 857*8965ea79SLois Curfman McInnes x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 858*8965ea79SLois Curfman McInnes } 859*8965ea79SLois Curfman McInnes } 860*8965ea79SLois Curfman McInnes } 861*8965ea79SLois Curfman McInnes else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 862*8965ea79SLois Curfman McInnes if (flag & SOR_ZERO_INITIAL_GUESS) { 863*8965ea79SLois Curfman McInnes return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 864*8965ea79SLois Curfman McInnes } 865*8965ea79SLois Curfman McInnes ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 866*8965ea79SLois Curfman McInnes CHKERRQ(ierr); 867*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx); 868*8965ea79SLois Curfman McInnes CHKERRQ(ierr); 869*8965ea79SLois Curfman McInnes while (its--) { 870*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 871*8965ea79SLois Curfman McInnes n = A->i[i+1] - A->i[i]; 872*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 873*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 874*8965ea79SLois Curfman McInnes sum = b[i]; 875*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 876*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 877*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 878*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 879*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 880*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 881*8965ea79SLois Curfman McInnes x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 882*8965ea79SLois Curfman McInnes } 883*8965ea79SLois Curfman McInnes } 884*8965ea79SLois Curfman McInnes } 885*8965ea79SLois Curfman McInnes else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 886*8965ea79SLois Curfman McInnes if (flag & SOR_ZERO_INITIAL_GUESS) { 887*8965ea79SLois Curfman McInnes return MatRelax(mat->A,bb,omega,flag,fshift,its,xx); 888*8965ea79SLois Curfman McInnes } 889*8965ea79SLois Curfman McInnes ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 890*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 891*8965ea79SLois Curfman McInnes ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL, 892*8965ea79SLois Curfman McInnes mat->Mvctx); CHKERRQ(ierr); 893*8965ea79SLois Curfman McInnes while (its--) { 894*8965ea79SLois Curfman McInnes for ( i=m-1; i>-1; i-- ) { 895*8965ea79SLois Curfman McInnes n = A->i[i+1] - A->i[i]; 896*8965ea79SLois Curfman McInnes idx = A->j + A->i[i] + shift; 897*8965ea79SLois Curfman McInnes v = A->a + A->i[i] + shift; 898*8965ea79SLois Curfman McInnes sum = b[i]; 899*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,xs,v,idx,n); 900*8965ea79SLois Curfman McInnes d = fshift + A->a[diag[i]+shift]; 901*8965ea79SLois Curfman McInnes n = B->i[i+1] - B->i[i]; 902*8965ea79SLois Curfman McInnes idx = B->j + B->i[i] + shift; 903*8965ea79SLois Curfman McInnes v = B->a + B->i[i] + shift; 904*8965ea79SLois Curfman McInnes SPARSEDENSEMDOT(sum,ls,v,idx,n); 905*8965ea79SLois Curfman McInnes x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]); 906*8965ea79SLois Curfman McInnes } 907*8965ea79SLois Curfman McInnes } 908*8965ea79SLois Curfman McInnes } 909*8965ea79SLois Curfman McInnes return 0; 910*8965ea79SLois Curfman McInnes } 911*8965ea79SLois Curfman McInnes 912*8965ea79SLois Curfman McInnes static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz, 913*8965ea79SLois Curfman McInnes int *nzalloc,int *mem) 914*8965ea79SLois Curfman McInnes { 915*8965ea79SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 916*8965ea79SLois Curfman McInnes Mat A = mat->A, B = mat->B; 917*8965ea79SLois Curfman McInnes int ierr, isend[3], irecv[3], nzA, nzallocA, memA; 918*8965ea79SLois Curfman McInnes 919*8965ea79SLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr); 920*8965ea79SLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr); 921*8965ea79SLois Curfman McInnes isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA; 922*8965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 923*8965ea79SLois Curfman McInnes *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2]; 924*8965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 925*8965ea79SLois Curfman McInnes MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm); 926*8965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 927*8965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 928*8965ea79SLois Curfman McInnes MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm); 929*8965ea79SLois Curfman McInnes *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2]; 930*8965ea79SLois Curfman McInnes } 931*8965ea79SLois Curfman McInnes return 0; 932*8965ea79SLois Curfman McInnes } 933*8965ea79SLois Curfman McInnes 934*8965ea79SLois Curfman McInnes extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*); 935*8965ea79SLois Curfman McInnes extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*); 936*8965ea79SLois Curfman McInnes extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double); 937*8965ea79SLois Curfman McInnes extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *); 938*8965ea79SLois Curfman McInnes extern int MatSolve_MPIAIJ(Mat,Vec,Vec); 939*8965ea79SLois Curfman McInnes extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec); 940*8965ea79SLois Curfman McInnes extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec); 941*8965ea79SLois Curfman McInnes extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec); 942*8965ea79SLois Curfman McInnes 943*8965ea79SLois Curfman McInnes static int MatSetOption_MPIAIJ(Mat A,MatOption op) 944*8965ea79SLois Curfman McInnes { 945*8965ea79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 946*8965ea79SLois Curfman McInnes 947*8965ea79SLois Curfman McInnes if (op == NO_NEW_NONZERO_LOCATIONS || 948*8965ea79SLois Curfman McInnes op == YES_NEW_NONZERO_LOCATIONS || 949*8965ea79SLois Curfman McInnes op == COLUMNS_SORTED || 950*8965ea79SLois Curfman McInnes op == ROW_ORIENTED) { 951*8965ea79SLois Curfman McInnes MatSetOption(a->A,op); 952*8965ea79SLois Curfman McInnes MatSetOption(a->B,op); 953*8965ea79SLois Curfman McInnes } 954*8965ea79SLois Curfman McInnes else if (op == ROWS_SORTED || 955*8965ea79SLois Curfman McInnes op == SYMMETRIC_MATRIX || 956*8965ea79SLois Curfman McInnes op == STRUCTURALLY_SYMMETRIC_MATRIX || 957*8965ea79SLois Curfman McInnes op == YES_NEW_DIAGONALS) 958*8965ea79SLois Curfman McInnes PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 959*8965ea79SLois Curfman McInnes else if (op == COLUMN_ORIENTED) 960*8965ea79SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");} 961*8965ea79SLois Curfman McInnes else if (op == NO_NEW_DIAGONALS) 962*8965ea79SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");} 963*8965ea79SLois Curfman McInnes else 964*8965ea79SLois Curfman McInnes {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 965*8965ea79SLois Curfman McInnes return 0; 966*8965ea79SLois Curfman McInnes } 967*8965ea79SLois Curfman McInnes 968*8965ea79SLois Curfman McInnes static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 969*8965ea79SLois Curfman McInnes { 970*8965ea79SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 971*8965ea79SLois Curfman McInnes *m = mat->M; *n = mat->N; 972*8965ea79SLois Curfman McInnes return 0; 973*8965ea79SLois Curfman McInnes } 974*8965ea79SLois Curfman McInnes 975*8965ea79SLois Curfman McInnes static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 976*8965ea79SLois Curfman McInnes { 977*8965ea79SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 978*8965ea79SLois Curfman McInnes *m = mat->m; *n = mat->N; 979*8965ea79SLois Curfman McInnes return 0; 980*8965ea79SLois Curfman McInnes } 981*8965ea79SLois Curfman McInnes 982*8965ea79SLois Curfman McInnes static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 983*8965ea79SLois Curfman McInnes { 984*8965ea79SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 985*8965ea79SLois Curfman McInnes *m = mat->rstart; *n = mat->rend; 986*8965ea79SLois Curfman McInnes return 0; 987*8965ea79SLois Curfman McInnes } 988*8965ea79SLois Curfman McInnes 989*8965ea79SLois Curfman McInnes static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 990*8965ea79SLois Curfman McInnes { 991*8965ea79SLois Curfman McInnes Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 992*8965ea79SLois Curfman McInnes Scalar *vworkA, *vworkB, **pvA, **pvB; 993*8965ea79SLois Curfman McInnes int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 994*8965ea79SLois Curfman McInnes int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 995*8965ea79SLois Curfman McInnes 996*8965ea79SLois Curfman McInnes if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows") 997*8965ea79SLois Curfman McInnes lrow = row - rstart; 998*8965ea79SLois Curfman McInnes 999*8965ea79SLois Curfman McInnes pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1000*8965ea79SLois Curfman McInnes if (!v) {pvA = 0; pvB = 0;} 1001*8965ea79SLois Curfman McInnes if (!idx) {pcA = 0; if (!v) pcB = 0;} 1002*8965ea79SLois Curfman McInnes ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1003*8965ea79SLois Curfman McInnes ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1004*8965ea79SLois Curfman McInnes nztot = nzA + nzB; 1005*8965ea79SLois Curfman McInnes 1006*8965ea79SLois Curfman McInnes if (v || idx) { 1007*8965ea79SLois Curfman McInnes if (nztot) { 1008*8965ea79SLois Curfman McInnes /* Sort by increasing column numbers, assuming A and B already sorted */ 1009*8965ea79SLois Curfman McInnes int imark; 1010*8965ea79SLois Curfman McInnes if (mat->assembled) { 1011*8965ea79SLois Curfman McInnes for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]]; 1012*8965ea79SLois Curfman McInnes } 1013*8965ea79SLois Curfman McInnes if (v) { 1014*8965ea79SLois Curfman McInnes *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v); 1015*8965ea79SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1016*8965ea79SLois Curfman McInnes if (cworkB[i] < cstart) (*v)[i] = vworkB[i]; 1017*8965ea79SLois Curfman McInnes else break; 1018*8965ea79SLois Curfman McInnes } 1019*8965ea79SLois Curfman McInnes imark = i; 1020*8965ea79SLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*v)[imark+i] = vworkA[i]; 1021*8965ea79SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i]; 1022*8965ea79SLois Curfman McInnes } 1023*8965ea79SLois Curfman McInnes if (idx) { 1024*8965ea79SLois Curfman McInnes *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx); 1025*8965ea79SLois Curfman McInnes for (i=0; i<nzA; i++) cworkA[i] += cstart; 1026*8965ea79SLois Curfman McInnes for ( i=0; i<nzB; i++ ) { 1027*8965ea79SLois Curfman McInnes if (cworkB[i] < cstart) (*idx)[i] = cworkB[i]; 1028*8965ea79SLois Curfman McInnes else break; 1029*8965ea79SLois Curfman McInnes } 1030*8965ea79SLois Curfman McInnes imark = i; 1031*8965ea79SLois Curfman McInnes for ( i=0; i<nzA; i++ ) (*idx)[imark+i] = cworkA[i]; 1032*8965ea79SLois Curfman McInnes for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i]; 1033*8965ea79SLois Curfman McInnes } 1034*8965ea79SLois Curfman McInnes } 1035*8965ea79SLois Curfman McInnes else {*idx = 0; *v=0;} 1036*8965ea79SLois Curfman McInnes } 1037*8965ea79SLois Curfman McInnes *nz = nztot; 1038*8965ea79SLois Curfman McInnes ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1039*8965ea79SLois Curfman McInnes ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1040*8965ea79SLois Curfman McInnes return 0; 1041*8965ea79SLois Curfman McInnes } 1042*8965ea79SLois Curfman McInnes 1043*8965ea79SLois Curfman McInnes static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1044*8965ea79SLois Curfman McInnes { 1045*8965ea79SLois Curfman McInnes if (idx) PETSCFREE(*idx); 1046*8965ea79SLois Curfman McInnes if (v) PETSCFREE(*v); 1047*8965ea79SLois Curfman McInnes return 0; 1048*8965ea79SLois Curfman McInnes } 1049*8965ea79SLois Curfman McInnes 1050*8965ea79SLois Curfman McInnes static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm) 1051*8965ea79SLois Curfman McInnes { 1052*8965ea79SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1053*8965ea79SLois Curfman McInnes Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1054*8965ea79SLois Curfman McInnes int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1055*8965ea79SLois Curfman McInnes double sum = 0.0; 1056*8965ea79SLois Curfman McInnes Scalar *v; 1057*8965ea79SLois Curfman McInnes 1058*8965ea79SLois Curfman McInnes if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix"); 1059*8965ea79SLois Curfman McInnes if (aij->size == 1) { 1060*8965ea79SLois Curfman McInnes ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1061*8965ea79SLois Curfman McInnes } else { 1062*8965ea79SLois Curfman McInnes if (type == NORM_FROBENIUS) { 1063*8965ea79SLois Curfman McInnes v = amat->a; 1064*8965ea79SLois Curfman McInnes for (i=0; i<amat->nz; i++ ) { 1065*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX) 1066*8965ea79SLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 1067*8965ea79SLois Curfman McInnes #else 1068*8965ea79SLois Curfman McInnes sum += (*v)*(*v); v++; 1069*8965ea79SLois Curfman McInnes #endif 1070*8965ea79SLois Curfman McInnes } 1071*8965ea79SLois Curfman McInnes v = bmat->a; 1072*8965ea79SLois Curfman McInnes for (i=0; i<bmat->nz; i++ ) { 1073*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX) 1074*8965ea79SLois Curfman McInnes sum += real(conj(*v)*(*v)); v++; 1075*8965ea79SLois Curfman McInnes #else 1076*8965ea79SLois Curfman McInnes sum += (*v)*(*v); v++; 1077*8965ea79SLois Curfman McInnes #endif 1078*8965ea79SLois Curfman McInnes } 1079*8965ea79SLois Curfman McInnes MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1080*8965ea79SLois Curfman McInnes *norm = sqrt(*norm); 1081*8965ea79SLois Curfman McInnes } 1082*8965ea79SLois Curfman McInnes else if (type == NORM_1) { /* max column norm */ 1083*8965ea79SLois Curfman McInnes double *tmp, *tmp2; 1084*8965ea79SLois Curfman McInnes int *jj, *garray = aij->garray; 1085*8965ea79SLois Curfman McInnes tmp = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp); 1086*8965ea79SLois Curfman McInnes tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2); 1087*8965ea79SLois Curfman McInnes PetscZero(tmp,aij->N*sizeof(double)); 1088*8965ea79SLois Curfman McInnes *norm = 0.0; 1089*8965ea79SLois Curfman McInnes v = amat->a; jj = amat->j; 1090*8965ea79SLois Curfman McInnes for ( j=0; j<amat->nz; j++ ) { 1091*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX) 1092*8965ea79SLois Curfman McInnes tmp[cstart + *jj++ + shift] += abs(*v++); 1093*8965ea79SLois Curfman McInnes #else 1094*8965ea79SLois Curfman McInnes tmp[cstart + *jj++ + shift] += fabs(*v++); 1095*8965ea79SLois Curfman McInnes #endif 1096*8965ea79SLois Curfman McInnes } 1097*8965ea79SLois Curfman McInnes v = bmat->a; jj = bmat->j; 1098*8965ea79SLois Curfman McInnes for ( j=0; j<bmat->nz; j++ ) { 1099*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX) 1100*8965ea79SLois Curfman McInnes tmp[garray[*jj++ + shift]] += abs(*v++); 1101*8965ea79SLois Curfman McInnes #else 1102*8965ea79SLois Curfman McInnes tmp[garray[*jj++ + shift]] += fabs(*v++); 1103*8965ea79SLois Curfman McInnes #endif 1104*8965ea79SLois Curfman McInnes } 1105*8965ea79SLois Curfman McInnes MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1106*8965ea79SLois Curfman McInnes for ( j=0; j<aij->N; j++ ) { 1107*8965ea79SLois Curfman McInnes if (tmp2[j] > *norm) *norm = tmp2[j]; 1108*8965ea79SLois Curfman McInnes } 1109*8965ea79SLois Curfman McInnes PETSCFREE(tmp); PETSCFREE(tmp2); 1110*8965ea79SLois Curfman McInnes } 1111*8965ea79SLois Curfman McInnes else if (type == NORM_INFINITY) { /* max row norm */ 1112*8965ea79SLois Curfman McInnes double normtemp = 0.0; 1113*8965ea79SLois Curfman McInnes for ( j=0; j<amat->m; j++ ) { 1114*8965ea79SLois Curfman McInnes v = amat->a + amat->i[j] + shift; 1115*8965ea79SLois Curfman McInnes sum = 0.0; 1116*8965ea79SLois Curfman McInnes for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1117*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX) 1118*8965ea79SLois Curfman McInnes sum += abs(*v); v++; 1119*8965ea79SLois Curfman McInnes #else 1120*8965ea79SLois Curfman McInnes sum += fabs(*v); v++; 1121*8965ea79SLois Curfman McInnes #endif 1122*8965ea79SLois Curfman McInnes } 1123*8965ea79SLois Curfman McInnes v = bmat->a + bmat->i[j] + shift; 1124*8965ea79SLois Curfman McInnes for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1125*8965ea79SLois Curfman McInnes #if defined(PETSC_COMPLEX) 1126*8965ea79SLois Curfman McInnes sum += abs(*v); v++; 1127*8965ea79SLois Curfman McInnes #else 1128*8965ea79SLois Curfman McInnes sum += fabs(*v); v++; 1129*8965ea79SLois Curfman McInnes #endif 1130*8965ea79SLois Curfman McInnes } 1131*8965ea79SLois Curfman McInnes if (sum > normtemp) normtemp = sum; 1132*8965ea79SLois Curfman McInnes MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1133*8965ea79SLois Curfman McInnes } 1134*8965ea79SLois Curfman McInnes } 1135*8965ea79SLois Curfman McInnes else { 1136*8965ea79SLois Curfman McInnes SETERRQ(1,"MatNorm_MPIRow:No support for two norm"); 1137*8965ea79SLois Curfman McInnes } 1138*8965ea79SLois Curfman McInnes } 1139*8965ea79SLois Curfman McInnes return 0; 1140*8965ea79SLois Curfman McInnes } 1141*8965ea79SLois Curfman McInnes 1142*8965ea79SLois Curfman McInnes static int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1143*8965ea79SLois Curfman McInnes { 1144*8965ea79SLois Curfman McInnes Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1145*8965ea79SLois Curfman McInnes Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1146*8965ea79SLois Curfman McInnes int ierr,shift = Aloc->indexshift; 1147*8965ea79SLois Curfman McInnes Mat B; 1148*8965ea79SLois Curfman McInnes int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1149*8965ea79SLois Curfman McInnes Scalar *array; 1150*8965ea79SLois Curfman McInnes 1151*8965ea79SLois Curfman McInnes if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place"); 1152*8965ea79SLois Curfman McInnes ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B); 1153*8965ea79SLois Curfman McInnes CHKERRQ(ierr); 1154*8965ea79SLois Curfman McInnes 1155*8965ea79SLois Curfman McInnes /* copy over the A part */ 1156*8965ea79SLois Curfman McInnes Aloc = (Mat_SeqAIJ*) a->A->data; 1157*8965ea79SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1158*8965ea79SLois Curfman McInnes row = a->rstart; 1159*8965ea79SLois Curfman McInnes for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1160*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 1161*8965ea79SLois Curfman McInnes ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1162*8965ea79SLois Curfman McInnes row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1163*8965ea79SLois Curfman McInnes } 1164*8965ea79SLois Curfman McInnes aj = Aloc->j; 1165*8965ea79SLois Curfman McInnes for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;} 1166*8965ea79SLois Curfman McInnes 1167*8965ea79SLois Curfman McInnes /* copy over the B part */ 1168*8965ea79SLois Curfman McInnes Aloc = (Mat_SeqAIJ*) a->B->data; 1169*8965ea79SLois Curfman McInnes m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1170*8965ea79SLois Curfman McInnes row = a->rstart; 1171*8965ea79SLois Curfman McInnes ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1172*8965ea79SLois Curfman McInnes for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1173*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 1174*8965ea79SLois Curfman McInnes ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1175*8965ea79SLois Curfman McInnes row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1176*8965ea79SLois Curfman McInnes } 1177*8965ea79SLois Curfman McInnes PETSCFREE(ct); 1178*8965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1179*8965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr); 1180*8965ea79SLois Curfman McInnes if (matout) { 1181*8965ea79SLois Curfman McInnes *matout = B; 1182*8965ea79SLois Curfman McInnes } else { 1183*8965ea79SLois Curfman McInnes /* This isn't really an in-place transpose .... but free data structures from a */ 1184*8965ea79SLois Curfman McInnes PETSCFREE(a->rowners); 1185*8965ea79SLois Curfman McInnes ierr = MatDestroy(a->A); CHKERRQ(ierr); 1186*8965ea79SLois Curfman McInnes ierr = MatDestroy(a->B); CHKERRQ(ierr); 1187*8965ea79SLois Curfman McInnes if (a->colmap) PETSCFREE(a->colmap); 1188*8965ea79SLois Curfman McInnes if (a->garray) PETSCFREE(a->garray); 1189*8965ea79SLois Curfman McInnes if (a->lvec) VecDestroy(a->lvec); 1190*8965ea79SLois Curfman McInnes if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx); 1191*8965ea79SLois Curfman McInnes PETSCFREE(a); 1192*8965ea79SLois Curfman McInnes PetscMemcpy(A,B,sizeof(struct _Mat)); 1193*8965ea79SLois Curfman McInnes PETSCHEADERDESTROY(B); 1194*8965ea79SLois Curfman McInnes } 1195*8965ea79SLois Curfman McInnes return 0; 1196*8965ea79SLois Curfman McInnes } 1197*8965ea79SLois Curfman McInnes 1198*8965ea79SLois Curfman McInnes extern int MatConvert_MPIAIJ(Mat,MatType,Mat *); 1199*8965ea79SLois Curfman McInnes static int MatCopyPrivate_MPIAIJ(Mat,Mat *); 1200*8965ea79SLois Curfman McInnes 1201*8965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 1202*8965ea79SLois Curfman McInnes static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1203*8965ea79SLois Curfman McInnes MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1204*8965ea79SLois Curfman McInnes MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1205*8965ea79SLois Curfman McInnes MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1206*8965ea79SLois Curfman McInnes MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ, 1207*8965ea79SLois Curfman McInnes MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ, 1208*8965ea79SLois Curfman McInnes MatLUFactor_MPIAIJ,0, 1209*8965ea79SLois Curfman McInnes MatRelax_MPIAIJ, 1210*8965ea79SLois Curfman McInnes MatTranspose_MPIAIJ, 1211*8965ea79SLois Curfman McInnes MatGetInfo_MPIAIJ,0, 1212*8965ea79SLois Curfman McInnes MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ, 1213*8965ea79SLois Curfman McInnes MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1214*8965ea79SLois Curfman McInnes 0, 1215*8965ea79SLois Curfman McInnes MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1216*8965ea79SLois Curfman McInnes MatGetReordering_MPIAIJ, 1217*8965ea79SLois Curfman McInnes MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0, 1218*8965ea79SLois Curfman McInnes MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1219*8965ea79SLois Curfman McInnes MatILUFactorSymbolic_MPIAIJ,0, 1220*8965ea79SLois Curfman McInnes 0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ}; 1221*8965ea79SLois Curfman McInnes 1222*8965ea79SLois Curfman McInnes /*@C 1223*8965ea79SLois Curfman McInnes MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1224*8965ea79SLois Curfman McInnes (the default parallel PETSc format). 1225*8965ea79SLois Curfman McInnes 1226*8965ea79SLois Curfman McInnes Input Parameters: 1227*8965ea79SLois Curfman McInnes . comm - MPI communicator 1228*8965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1229*8965ea79SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated 1230*8965ea79SLois Curfman McInnes if N is given) 1231*8965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1232*8965ea79SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated 1233*8965ea79SLois Curfman McInnes if n is given) 1234*8965ea79SLois Curfman McInnes . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1235*8965ea79SLois Curfman McInnes (same for all local rows) 1236*8965ea79SLois Curfman McInnes . d_nzz - number of nonzeros per row in diagonal portion of local submatrix 1237*8965ea79SLois Curfman McInnes or null (possibly different for each row). You must leave room 1238*8965ea79SLois Curfman McInnes for the diagonal entry even if it is zero. 1239*8965ea79SLois Curfman McInnes . o_nz - number of nonzeros per row in off-diagonal portion of local 1240*8965ea79SLois Curfman McInnes submatrix (same for all local rows). 1241*8965ea79SLois Curfman McInnes . o_nzz - number of nonzeros per row in off-diagonal portion of local 1242*8965ea79SLois Curfman McInnes submatrix or null (possibly different for each row). 1243*8965ea79SLois Curfman McInnes 1244*8965ea79SLois Curfman McInnes Output Parameter: 1245*8965ea79SLois Curfman McInnes . newmat - the matrix 1246*8965ea79SLois Curfman McInnes 1247*8965ea79SLois Curfman McInnes Notes: 1248*8965ea79SLois Curfman McInnes The AIJ format (also called the Yale sparse matrix format or 1249*8965ea79SLois Curfman McInnes compressed row storage), is fully compatible with standard Fortran 77 1250*8965ea79SLois Curfman McInnes storage. That is, the stored row and column indices begin at 1251*8965ea79SLois Curfman McInnes one, not zero. See the users manual for further details. 1252*8965ea79SLois Curfman McInnes 1253*8965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 1254*8965ea79SLois Curfman McInnes (possibly both). 1255*8965ea79SLois Curfman McInnes 1256*8965ea79SLois Curfman McInnes Storage Information: 1257*8965ea79SLois Curfman McInnes For a square global matrix we define each processor's diagonal portion 1258*8965ea79SLois Curfman McInnes to be its local rows and the corresponding columns (a square submatrix); 1259*8965ea79SLois Curfman McInnes each processor's off-diagonal portion encompasses the remainder of the 1260*8965ea79SLois Curfman McInnes local matrix (a rectangular submatrix). 1261*8965ea79SLois Curfman McInnes 1262*8965ea79SLois Curfman McInnes The user can specify preallocated storage for the diagonal part of 1263*8965ea79SLois Curfman McInnes the local submatrix with either d_nz or d_nnz (not both). Set both 1264*8965ea79SLois Curfman McInnes d_nz and d_nnz to zero for PETSc to control dynamic memory allocation. 1265*8965ea79SLois Curfman McInnes Likewise, specify preallocated storage for the off-diagonal part of 1266*8965ea79SLois Curfman McInnes the local submatrix with o_nz or o_nnz (not both). 1267*8965ea79SLois Curfman McInnes 1268*8965ea79SLois Curfman McInnes .keywords: matrix, aij, compressed row, sparse, parallel 1269*8965ea79SLois Curfman McInnes 1270*8965ea79SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1271*8965ea79SLois Curfman McInnes @*/ 1272*8965ea79SLois Curfman McInnes int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1273*8965ea79SLois Curfman McInnes int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat) 1274*8965ea79SLois Curfman McInnes { 1275*8965ea79SLois Curfman McInnes Mat mat; 1276*8965ea79SLois Curfman McInnes Mat_MPIAIJ *a; 1277*8965ea79SLois Curfman McInnes int ierr, i,sum[2],work[2]; 1278*8965ea79SLois Curfman McInnes 1279*8965ea79SLois Curfman McInnes *newmat = 0; 1280*8965ea79SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm); 1281*8965ea79SLois Curfman McInnes PLogObjectCreate(mat); 1282*8965ea79SLois Curfman McInnes mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1283*8965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1284*8965ea79SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 1285*8965ea79SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1286*8965ea79SLois Curfman McInnes mat->factor = 0; 1287*8965ea79SLois Curfman McInnes 1288*8965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1289*8965ea79SLois Curfman McInnes MPI_Comm_rank(comm,&a->rank); 1290*8965ea79SLois Curfman McInnes MPI_Comm_size(comm,&a->size); 1291*8965ea79SLois Curfman McInnes 1292*8965ea79SLois Curfman McInnes if (m == PETSC_DECIDE && (d_nnz || o_nnz)) 1293*8965ea79SLois Curfman McInnes SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz"); 1294*8965ea79SLois Curfman McInnes 1295*8965ea79SLois Curfman McInnes if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1296*8965ea79SLois Curfman McInnes work[0] = m; work[1] = n; 1297*8965ea79SLois Curfman McInnes MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1298*8965ea79SLois Curfman McInnes if (M == PETSC_DECIDE) M = sum[0]; 1299*8965ea79SLois Curfman McInnes if (N == PETSC_DECIDE) N = sum[1]; 1300*8965ea79SLois Curfman McInnes } 1301*8965ea79SLois Curfman McInnes if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);} 1302*8965ea79SLois Curfman McInnes if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 1303*8965ea79SLois Curfman McInnes a->m = m; 1304*8965ea79SLois Curfman McInnes a->n = n; 1305*8965ea79SLois Curfman McInnes a->N = N; 1306*8965ea79SLois Curfman McInnes a->M = M; 1307*8965ea79SLois Curfman McInnes 1308*8965ea79SLois Curfman McInnes /* build local table of row and column ownerships */ 1309*8965ea79SLois Curfman McInnes a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1310*8965ea79SLois Curfman McInnes PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+ 1311*8965ea79SLois Curfman McInnes sizeof(Mat_MPIAIJ)); 1312*8965ea79SLois Curfman McInnes a->cowners = a->rowners + a->size + 1; 1313*8965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm); 1314*8965ea79SLois Curfman McInnes a->rowners[0] = 0; 1315*8965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1316*8965ea79SLois Curfman McInnes a->rowners[i] += a->rowners[i-1]; 1317*8965ea79SLois Curfman McInnes } 1318*8965ea79SLois Curfman McInnes a->rstart = a->rowners[a->rank]; 1319*8965ea79SLois Curfman McInnes a->rend = a->rowners[a->rank+1]; 1320*8965ea79SLois Curfman McInnes MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm); 1321*8965ea79SLois Curfman McInnes a->cowners[0] = 0; 1322*8965ea79SLois Curfman McInnes for ( i=2; i<=a->size; i++ ) { 1323*8965ea79SLois Curfman McInnes a->cowners[i] += a->cowners[i-1]; 1324*8965ea79SLois Curfman McInnes } 1325*8965ea79SLois Curfman McInnes a->cstart = a->cowners[a->rank]; 1326*8965ea79SLois Curfman McInnes a->cend = a->cowners[a->rank+1]; 1327*8965ea79SLois Curfman McInnes 1328*8965ea79SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr); 1329*8965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 1330*8965ea79SLois Curfman McInnes ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr); 1331*8965ea79SLois Curfman McInnes PLogObjectParent(mat,a->B); 1332*8965ea79SLois Curfman McInnes 1333*8965ea79SLois Curfman McInnes /* build cache for off array entries formed */ 1334*8965ea79SLois Curfman McInnes ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr); 1335*8965ea79SLois Curfman McInnes a->colmap = 0; 1336*8965ea79SLois Curfman McInnes a->garray = 0; 1337*8965ea79SLois Curfman McInnes 1338*8965ea79SLois Curfman McInnes /* stuff used for matrix vector multiply */ 1339*8965ea79SLois Curfman McInnes a->lvec = 0; 1340*8965ea79SLois Curfman McInnes a->Mvctx = 0; 1341*8965ea79SLois Curfman McInnes a->assembled = 0; 1342*8965ea79SLois Curfman McInnes 1343*8965ea79SLois Curfman McInnes *newmat = mat; 1344*8965ea79SLois Curfman McInnes return 0; 1345*8965ea79SLois Curfman McInnes } 1346*8965ea79SLois Curfman McInnes 1347*8965ea79SLois Curfman McInnes static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat) 1348*8965ea79SLois Curfman McInnes { 1349*8965ea79SLois Curfman McInnes Mat mat; 1350*8965ea79SLois Curfman McInnes Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1351*8965ea79SLois Curfman McInnes int ierr, len; 1352*8965ea79SLois Curfman McInnes 1353*8965ea79SLois Curfman McInnes if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix"); 1354*8965ea79SLois Curfman McInnes *newmat = 0; 1355*8965ea79SLois Curfman McInnes PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm); 1356*8965ea79SLois Curfman McInnes PLogObjectCreate(mat); 1357*8965ea79SLois Curfman McInnes mat->data = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a); 1358*8965ea79SLois Curfman McInnes PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1359*8965ea79SLois Curfman McInnes mat->destroy = MatDestroy_MPIAIJ; 1360*8965ea79SLois Curfman McInnes mat->view = MatView_MPIAIJ; 1361*8965ea79SLois Curfman McInnes mat->factor = matin->factor; 1362*8965ea79SLois Curfman McInnes 1363*8965ea79SLois Curfman McInnes a->m = oldmat->m; 1364*8965ea79SLois Curfman McInnes a->n = oldmat->n; 1365*8965ea79SLois Curfman McInnes a->M = oldmat->M; 1366*8965ea79SLois Curfman McInnes a->N = oldmat->N; 1367*8965ea79SLois Curfman McInnes 1368*8965ea79SLois Curfman McInnes a->assembled = 1; 1369*8965ea79SLois Curfman McInnes a->rstart = oldmat->rstart; 1370*8965ea79SLois Curfman McInnes a->rend = oldmat->rend; 1371*8965ea79SLois Curfman McInnes a->cstart = oldmat->cstart; 1372*8965ea79SLois Curfman McInnes a->cend = oldmat->cend; 1373*8965ea79SLois Curfman McInnes a->size = oldmat->size; 1374*8965ea79SLois Curfman McInnes a->rank = oldmat->rank; 1375*8965ea79SLois Curfman McInnes a->insertmode = NOT_SET_VALUES; 1376*8965ea79SLois Curfman McInnes 1377*8965ea79SLois Curfman McInnes a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners); 1378*8965ea79SLois Curfman McInnes PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ)); 1379*8965ea79SLois Curfman McInnes PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int)); 1380*8965ea79SLois Curfman McInnes ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1381*8965ea79SLois Curfman McInnes if (oldmat->colmap) { 1382*8965ea79SLois Curfman McInnes a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1383*8965ea79SLois Curfman McInnes PLogObjectMemory(mat,(a->N)*sizeof(int)); 1384*8965ea79SLois Curfman McInnes PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1385*8965ea79SLois Curfman McInnes } else a->colmap = 0; 1386*8965ea79SLois Curfman McInnes if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) { 1387*8965ea79SLois Curfman McInnes a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray); 1388*8965ea79SLois Curfman McInnes PLogObjectMemory(mat,len*sizeof(int)); 1389*8965ea79SLois Curfman McInnes PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1390*8965ea79SLois Curfman McInnes } else a->garray = 0; 1391*8965ea79SLois Curfman McInnes 1392*8965ea79SLois Curfman McInnes ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1393*8965ea79SLois Curfman McInnes PLogObjectParent(mat,a->lvec); 1394*8965ea79SLois Curfman McInnes ierr = VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1395*8965ea79SLois Curfman McInnes PLogObjectParent(mat,a->Mvctx); 1396*8965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1397*8965ea79SLois Curfman McInnes PLogObjectParent(mat,a->A); 1398*8965ea79SLois Curfman McInnes ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1399*8965ea79SLois Curfman McInnes PLogObjectParent(mat,a->B); 1400*8965ea79SLois Curfman McInnes *newmat = mat; 1401*8965ea79SLois Curfman McInnes return 0; 1402*8965ea79SLois Curfman McInnes } 1403*8965ea79SLois Curfman McInnes 1404*8965ea79SLois Curfman McInnes #include "sysio.h" 1405*8965ea79SLois Curfman McInnes 1406*8965ea79SLois Curfman McInnes int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat) 1407*8965ea79SLois Curfman McInnes { 1408*8965ea79SLois Curfman McInnes Mat A; 1409*8965ea79SLois Curfman McInnes int i, nz, ierr, j,rstart, rend, fd; 1410*8965ea79SLois Curfman McInnes Scalar *vals,*svals; 1411*8965ea79SLois Curfman McInnes PetscObject vobj = (PetscObject) bview; 1412*8965ea79SLois Curfman McInnes MPI_Comm comm = vobj->comm; 1413*8965ea79SLois Curfman McInnes MPI_Status status; 1414*8965ea79SLois Curfman McInnes int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1415*8965ea79SLois Curfman McInnes int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1416*8965ea79SLois Curfman McInnes int tag = ((PetscObject)bview)->tag; 1417*8965ea79SLois Curfman McInnes 1418*8965ea79SLois Curfman McInnes MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1419*8965ea79SLois Curfman McInnes if (!rank) { 1420*8965ea79SLois Curfman McInnes ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr); 1421*8965ea79SLois Curfman McInnes ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr); 1422*8965ea79SLois Curfman McInnes if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object"); 1423*8965ea79SLois Curfman McInnes } 1424*8965ea79SLois Curfman McInnes 1425*8965ea79SLois Curfman McInnes MPI_Bcast(header+1,3,MPI_INT,0,comm); 1426*8965ea79SLois Curfman McInnes M = header[1]; N = header[2]; 1427*8965ea79SLois Curfman McInnes /* determine ownership of all rows */ 1428*8965ea79SLois Curfman McInnes m = M/size + ((M % size) > rank); 1429*8965ea79SLois Curfman McInnes rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners); 1430*8965ea79SLois Curfman McInnes MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1431*8965ea79SLois Curfman McInnes rowners[0] = 0; 1432*8965ea79SLois Curfman McInnes for ( i=2; i<=size; i++ ) { 1433*8965ea79SLois Curfman McInnes rowners[i] += rowners[i-1]; 1434*8965ea79SLois Curfman McInnes } 1435*8965ea79SLois Curfman McInnes rstart = rowners[rank]; 1436*8965ea79SLois Curfman McInnes rend = rowners[rank+1]; 1437*8965ea79SLois Curfman McInnes 1438*8965ea79SLois Curfman McInnes /* distribute row lengths to all processors */ 1439*8965ea79SLois Curfman McInnes ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1440*8965ea79SLois Curfman McInnes offlens = ourlens + (rend-rstart); 1441*8965ea79SLois Curfman McInnes if (!rank) { 1442*8965ea79SLois Curfman McInnes rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths); 1443*8965ea79SLois Curfman McInnes ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr); 1444*8965ea79SLois Curfman McInnes sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts); 1445*8965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1446*8965ea79SLois Curfman McInnes MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1447*8965ea79SLois Curfman McInnes PETSCFREE(sndcounts); 1448*8965ea79SLois Curfman McInnes } 1449*8965ea79SLois Curfman McInnes else { 1450*8965ea79SLois Curfman McInnes MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1451*8965ea79SLois Curfman McInnes } 1452*8965ea79SLois Curfman McInnes 1453*8965ea79SLois Curfman McInnes if (!rank) { 1454*8965ea79SLois Curfman McInnes /* calculate the number of nonzeros on each processor */ 1455*8965ea79SLois Curfman McInnes procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz); 1456*8965ea79SLois Curfman McInnes PetscZero(procsnz,size*sizeof(int)); 1457*8965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1458*8965ea79SLois Curfman McInnes for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1459*8965ea79SLois Curfman McInnes procsnz[i] += rowlengths[j]; 1460*8965ea79SLois Curfman McInnes } 1461*8965ea79SLois Curfman McInnes } 1462*8965ea79SLois Curfman McInnes PETSCFREE(rowlengths); 1463*8965ea79SLois Curfman McInnes 1464*8965ea79SLois Curfman McInnes /* determine max buffer needed and allocate it */ 1465*8965ea79SLois Curfman McInnes maxnz = 0; 1466*8965ea79SLois Curfman McInnes for ( i=0; i<size; i++ ) { 1467*8965ea79SLois Curfman McInnes maxnz = PETSCMAX(maxnz,procsnz[i]); 1468*8965ea79SLois Curfman McInnes } 1469*8965ea79SLois Curfman McInnes cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols); 1470*8965ea79SLois Curfman McInnes 1471*8965ea79SLois Curfman McInnes /* read in my part of the matrix column indices */ 1472*8965ea79SLois Curfman McInnes nz = procsnz[0]; 1473*8965ea79SLois Curfman McInnes mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1474*8965ea79SLois Curfman McInnes ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr); 1475*8965ea79SLois Curfman McInnes 1476*8965ea79SLois Curfman McInnes /* read in every one elses and ship off */ 1477*8965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 1478*8965ea79SLois Curfman McInnes nz = procsnz[i]; 1479*8965ea79SLois Curfman McInnes ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr); 1480*8965ea79SLois Curfman McInnes MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1481*8965ea79SLois Curfman McInnes } 1482*8965ea79SLois Curfman McInnes PETSCFREE(cols); 1483*8965ea79SLois Curfman McInnes } 1484*8965ea79SLois Curfman McInnes else { 1485*8965ea79SLois Curfman McInnes /* determine buffer space needed for message */ 1486*8965ea79SLois Curfman McInnes nz = 0; 1487*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 1488*8965ea79SLois Curfman McInnes nz += ourlens[i]; 1489*8965ea79SLois Curfman McInnes } 1490*8965ea79SLois Curfman McInnes mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols); 1491*8965ea79SLois Curfman McInnes 1492*8965ea79SLois Curfman McInnes /* receive message of column indices*/ 1493*8965ea79SLois Curfman McInnes MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1494*8965ea79SLois Curfman McInnes MPI_Get_count(&status,MPI_INT,&maxnz); 1495*8965ea79SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1496*8965ea79SLois Curfman McInnes } 1497*8965ea79SLois Curfman McInnes 1498*8965ea79SLois Curfman McInnes /* loop over local rows, determining number of off diagonal entries */ 1499*8965ea79SLois Curfman McInnes PetscZero(offlens,m*sizeof(int)); 1500*8965ea79SLois Curfman McInnes jj = 0; 1501*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 1502*8965ea79SLois Curfman McInnes for ( j=0; j<ourlens[i]; j++ ) { 1503*8965ea79SLois Curfman McInnes if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1504*8965ea79SLois Curfman McInnes jj++; 1505*8965ea79SLois Curfman McInnes } 1506*8965ea79SLois Curfman McInnes } 1507*8965ea79SLois Curfman McInnes 1508*8965ea79SLois Curfman McInnes /* create our matrix */ 1509*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 1510*8965ea79SLois Curfman McInnes ourlens[i] -= offlens[i]; 1511*8965ea79SLois Curfman McInnes } 1512*8965ea79SLois Curfman McInnes if (type == MATMPIAIJ) { 1513*8965ea79SLois Curfman McInnes ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1514*8965ea79SLois Curfman McInnes } 1515*8965ea79SLois Curfman McInnes else if (type == MATMPIROW) { 1516*8965ea79SLois Curfman McInnes ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1517*8965ea79SLois Curfman McInnes } 1518*8965ea79SLois Curfman McInnes A = *newmat; 1519*8965ea79SLois Curfman McInnes MatSetOption(A,COLUMNS_SORTED); 1520*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 1521*8965ea79SLois Curfman McInnes ourlens[i] += offlens[i]; 1522*8965ea79SLois Curfman McInnes } 1523*8965ea79SLois Curfman McInnes 1524*8965ea79SLois Curfman McInnes if (!rank) { 1525*8965ea79SLois Curfman McInnes vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1526*8965ea79SLois Curfman McInnes 1527*8965ea79SLois Curfman McInnes /* read in my part of the matrix numerical values */ 1528*8965ea79SLois Curfman McInnes nz = procsnz[0]; 1529*8965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1530*8965ea79SLois Curfman McInnes 1531*8965ea79SLois Curfman McInnes /* insert into matrix */ 1532*8965ea79SLois Curfman McInnes jj = rstart; 1533*8965ea79SLois Curfman McInnes smycols = mycols; 1534*8965ea79SLois Curfman McInnes svals = vals; 1535*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 1536*8965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1537*8965ea79SLois Curfman McInnes smycols += ourlens[i]; 1538*8965ea79SLois Curfman McInnes svals += ourlens[i]; 1539*8965ea79SLois Curfman McInnes jj++; 1540*8965ea79SLois Curfman McInnes } 1541*8965ea79SLois Curfman McInnes 1542*8965ea79SLois Curfman McInnes /* read in other processors and ship out */ 1543*8965ea79SLois Curfman McInnes for ( i=1; i<size; i++ ) { 1544*8965ea79SLois Curfman McInnes nz = procsnz[i]; 1545*8965ea79SLois Curfman McInnes ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr); 1546*8965ea79SLois Curfman McInnes MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1547*8965ea79SLois Curfman McInnes } 1548*8965ea79SLois Curfman McInnes PETSCFREE(procsnz); 1549*8965ea79SLois Curfman McInnes } 1550*8965ea79SLois Curfman McInnes else { 1551*8965ea79SLois Curfman McInnes /* receive numeric values */ 1552*8965ea79SLois Curfman McInnes vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1553*8965ea79SLois Curfman McInnes 1554*8965ea79SLois Curfman McInnes /* receive message of values*/ 1555*8965ea79SLois Curfman McInnes MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1556*8965ea79SLois Curfman McInnes MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1557*8965ea79SLois Curfman McInnes if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file"); 1558*8965ea79SLois Curfman McInnes 1559*8965ea79SLois Curfman McInnes /* insert into matrix */ 1560*8965ea79SLois Curfman McInnes jj = rstart; 1561*8965ea79SLois Curfman McInnes smycols = mycols; 1562*8965ea79SLois Curfman McInnes svals = vals; 1563*8965ea79SLois Curfman McInnes for ( i=0; i<m; i++ ) { 1564*8965ea79SLois Curfman McInnes ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1565*8965ea79SLois Curfman McInnes smycols += ourlens[i]; 1566*8965ea79SLois Curfman McInnes svals += ourlens[i]; 1567*8965ea79SLois Curfman McInnes jj++; 1568*8965ea79SLois Curfman McInnes } 1569*8965ea79SLois Curfman McInnes } 1570*8965ea79SLois Curfman McInnes PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners); 1571*8965ea79SLois Curfman McInnes 1572*8965ea79SLois Curfman McInnes ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1573*8965ea79SLois Curfman McInnes ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 1574*8965ea79SLois Curfman McInnes return 0; 1575*8965ea79SLois Curfman McInnes } 1576