179bdfe76SSatish Balay #ifndef lint 2*cee3aa6bSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.3 1996/06/19 23:03:32 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 579bdfe76SSatish Balay #include "mpibaij.h" 679bdfe76SSatish Balay 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 1357b952d6SSatish Balay 1457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column 1557b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1657b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1757b952d6SSatish Balay length of colmap equals the global matrix length. 1857b952d6SSatish Balay */ 1957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2057b952d6SSatish Balay { 2157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2257b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 2357b952d6SSatish Balay int nbs = B->nbs,i; 2457b952d6SSatish Balay 2557b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 2657b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 2757b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 2857b952d6SSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i; 2957b952d6SSatish Balay return 0; 3057b952d6SSatish Balay } 3157b952d6SSatish Balay 3257b952d6SSatish Balay 3357b952d6SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm) 3457b952d6SSatish Balay { 3557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3657b952d6SSatish Balay int ierr; 3757b952d6SSatish Balay if (baij->size == 1) { 3857b952d6SSatish Balay ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr); 3957b952d6SSatish Balay } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel"); 4057b952d6SSatish Balay return 0; 4157b952d6SSatish Balay } 4257b952d6SSatish Balay 4357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4457b952d6SSatish Balay { 4557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4657b952d6SSatish Balay Scalar value; 4757b952d6SSatish Balay int ierr,i,j, rstart = baij->rstart, rend = baij->rend; 4857b952d6SSatish Balay int cstart = baij->cstart, cend = baij->cend,row,col; 4957b952d6SSatish Balay int roworiented = baij->roworiented,rstart_orig,rend_orig; 5057b952d6SSatish Balay int cstart_orig,cend_orig,bs=baij->bs; 5157b952d6SSatish Balay 5257b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 5357b952d6SSatish Balay SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 5457b952d6SSatish Balay } 5557b952d6SSatish Balay baij->insertmode = addv; 5657b952d6SSatish Balay rstart_orig = rstart*bs; 5757b952d6SSatish Balay rend_orig = rend*bs; 5857b952d6SSatish Balay cstart_orig = cstart*bs; 5957b952d6SSatish Balay cend_orig = cend*bs; 6057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 6157b952d6SSatish Balay if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 6257b952d6SSatish Balay if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 6357b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 6457b952d6SSatish Balay row = im[i] - rstart_orig; 6557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 6657b952d6SSatish Balay if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column"); 6757b952d6SSatish Balay if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large"); 6857b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 6957b952d6SSatish Balay col = in[j] - cstart_orig; 7057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 7157b952d6SSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 7257b952d6SSatish Balay } 7357b952d6SSatish Balay else { 7457b952d6SSatish Balay if (mat->was_assembled) { 7557b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 7657b952d6SSatish Balay col = baij->colmap[in[j]]; 7757b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 7857b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 7957b952d6SSatish Balay col = in[j]; 8057b952d6SSatish Balay } 8157b952d6SSatish Balay } 8257b952d6SSatish Balay else col = in[j]; 8357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 8457b952d6SSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 8557b952d6SSatish Balay } 8657b952d6SSatish Balay } 8757b952d6SSatish Balay } 8857b952d6SSatish Balay else { 8957b952d6SSatish Balay if (roworiented) { 9057b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 9157b952d6SSatish Balay } 9257b952d6SSatish Balay else { 9357b952d6SSatish Balay row = im[i]; 9457b952d6SSatish Balay for ( j=0; j<n; j++ ) { 9557b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 9657b952d6SSatish Balay } 9757b952d6SSatish Balay } 9857b952d6SSatish Balay } 9957b952d6SSatish Balay } 10057b952d6SSatish Balay return 0; 10157b952d6SSatish Balay } 10257b952d6SSatish Balay 10357b952d6SSatish Balay 10457b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 10557b952d6SSatish Balay { 10657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 10757b952d6SSatish Balay MPI_Comm comm = mat->comm; 10857b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 10957b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 11057b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 11157b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 11257b952d6SSatish Balay InsertMode addv; 11357b952d6SSatish Balay Scalar *rvalues,*svalues; 11457b952d6SSatish Balay 11557b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 11657b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 11757b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 11857b952d6SSatish Balay SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 11957b952d6SSatish Balay } 12057b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 12157b952d6SSatish Balay 12257b952d6SSatish Balay /* first count number of contributors to each processor */ 12357b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 12457b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 12557b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 12657b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 12757b952d6SSatish Balay idx = baij->stash.idx[i]; 12857b952d6SSatish Balay for ( j=0; j<size; j++ ) { 12957b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 13057b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 13157b952d6SSatish Balay } 13257b952d6SSatish Balay } 13357b952d6SSatish Balay } 13457b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 13557b952d6SSatish Balay 13657b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 13757b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 13857b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 13957b952d6SSatish Balay nreceives = work[rank]; 14057b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 14157b952d6SSatish Balay nmax = work[rank]; 14257b952d6SSatish Balay PetscFree(work); 14357b952d6SSatish Balay 14457b952d6SSatish Balay /* post receives: 14557b952d6SSatish Balay 1) each message will consist of ordered pairs 14657b952d6SSatish Balay (global index,value) we store the global index as a double 14757b952d6SSatish Balay to simplify the message passing. 14857b952d6SSatish Balay 2) since we don't know how long each individual message is we 14957b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 15057b952d6SSatish Balay this is a lot of wasted space. 15157b952d6SSatish Balay 15257b952d6SSatish Balay 15357b952d6SSatish Balay This could be done better. 15457b952d6SSatish Balay */ 15557b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 15657b952d6SSatish Balay CHKPTRQ(rvalues); 15757b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 15857b952d6SSatish Balay CHKPTRQ(recv_waits); 15957b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 16057b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 16157b952d6SSatish Balay comm,recv_waits+i); 16257b952d6SSatish Balay } 16357b952d6SSatish Balay 16457b952d6SSatish Balay /* do sends: 16557b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 16657b952d6SSatish Balay the ith processor 16757b952d6SSatish Balay */ 16857b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 16957b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 17057b952d6SSatish Balay CHKPTRQ(send_waits); 17157b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 17257b952d6SSatish Balay starts[0] = 0; 17357b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 17457b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 17557b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 17657b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 17757b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 17857b952d6SSatish Balay } 17957b952d6SSatish Balay PetscFree(owner); 18057b952d6SSatish Balay starts[0] = 0; 18157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 18257b952d6SSatish Balay count = 0; 18357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 18457b952d6SSatish Balay if (procs[i]) { 18557b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 18657b952d6SSatish Balay comm,send_waits+count++); 18757b952d6SSatish Balay } 18857b952d6SSatish Balay } 18957b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 19057b952d6SSatish Balay 19157b952d6SSatish Balay /* Free cache space */ 19257b952d6SSatish Balay PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n); 19357b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 19457b952d6SSatish Balay 19557b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 19657b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 19757b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 19857b952d6SSatish Balay baij->rmax = nmax; 19957b952d6SSatish Balay 20057b952d6SSatish Balay return 0; 20157b952d6SSatish Balay } 20257b952d6SSatish Balay 20357b952d6SSatish Balay 20457b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 20557b952d6SSatish Balay { 20657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 20757b952d6SSatish Balay MPI_Status *send_status,recv_status; 20857b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 20957b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 21057b952d6SSatish Balay Scalar *values,val; 21157b952d6SSatish Balay InsertMode addv = baij->insertmode; 21257b952d6SSatish Balay 21357b952d6SSatish Balay /* wait on receives */ 21457b952d6SSatish Balay while (count) { 21557b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 21657b952d6SSatish Balay /* unpack receives into our local space */ 21757b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 21857b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 21957b952d6SSatish Balay n = n/3; 22057b952d6SSatish Balay for ( i=0; i<n; i++ ) { 22157b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 22257b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 22357b952d6SSatish Balay val = values[3*i+2]; 22457b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 22557b952d6SSatish Balay col -= baij->cstart*bs; 22657b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 22757b952d6SSatish Balay } 22857b952d6SSatish Balay else { 22957b952d6SSatish Balay if (mat->was_assembled) { 23057b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);} 23157b952d6SSatish Balay col = baij->colmap[col/bs]*bs + col%bs; 23257b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 23357b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 23457b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 23557b952d6SSatish Balay } 23657b952d6SSatish Balay } 23757b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 23857b952d6SSatish Balay } 23957b952d6SSatish Balay } 24057b952d6SSatish Balay count--; 24157b952d6SSatish Balay } 24257b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 24357b952d6SSatish Balay 24457b952d6SSatish Balay /* wait on sends */ 24557b952d6SSatish Balay if (baij->nsends) { 24657b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 24757b952d6SSatish Balay CHKPTRQ(send_status); 24857b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 24957b952d6SSatish Balay PetscFree(send_status); 25057b952d6SSatish Balay } 25157b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 25257b952d6SSatish Balay 25357b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 25457b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 25557b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 25657b952d6SSatish Balay 25757b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 25857b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 25957b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 26057b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 26157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 26257b952d6SSatish Balay } 26357b952d6SSatish Balay 26457b952d6SSatish Balay if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 26557b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 26657b952d6SSatish Balay } 26757b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 26857b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 26957b952d6SSatish Balay 27057b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 27157b952d6SSatish Balay return 0; 27257b952d6SSatish Balay } 27357b952d6SSatish Balay 27457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 27557b952d6SSatish Balay { 27657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 27757b952d6SSatish Balay int ierr; 27857b952d6SSatish Balay 27957b952d6SSatish Balay if (baij->size == 1) { 28057b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 28157b952d6SSatish Balay } 28257b952d6SSatish Balay else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 28357b952d6SSatish Balay return 0; 28457b952d6SSatish Balay } 28557b952d6SSatish Balay 28657b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 28757b952d6SSatish Balay { 28857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 289*cee3aa6bSSatish Balay int ierr, format,rank,bs=baij->bs; 29057b952d6SSatish Balay FILE *fd; 29157b952d6SSatish Balay ViewerType vtype; 29257b952d6SSatish Balay 29357b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 29457b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 29557b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 29657b952d6SSatish Balay if (format == ASCII_FORMAT_INFO_DETAILED) { 29757b952d6SSatish Balay int nz, nzalloc, mem; 29857b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 29957b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 30057b952d6SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 30157b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 30257b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 30357b952d6SSatish Balay rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem); 30457b952d6SSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 30557b952d6SSatish Balay fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs); 30657b952d6SSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 30757b952d6SSatish Balay fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs); 30857b952d6SSatish Balay fflush(fd); 30957b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 31057b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 31157b952d6SSatish Balay return 0; 31257b952d6SSatish Balay } 31357b952d6SSatish Balay else if (format == ASCII_FORMAT_INFO) { 31457b952d6SSatish Balay return 0; 31557b952d6SSatish Balay } 31657b952d6SSatish Balay } 31757b952d6SSatish Balay 31857b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 31957b952d6SSatish Balay Draw draw; 32057b952d6SSatish Balay PetscTruth isnull; 32157b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 32257b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 32357b952d6SSatish Balay } 32457b952d6SSatish Balay 32557b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 32657b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 32757b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 32857b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 32957b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 33057b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 33157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 33257b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 33357b952d6SSatish Balay fflush(fd); 33457b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 33557b952d6SSatish Balay } 33657b952d6SSatish Balay else { 33757b952d6SSatish Balay int size = baij->size; 33857b952d6SSatish Balay rank = baij->rank; 33957b952d6SSatish Balay if (size == 1) { 34057b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 34157b952d6SSatish Balay } 34257b952d6SSatish Balay else { 34357b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 34457b952d6SSatish Balay Mat A; 34557b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 34657b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 34757b952d6SSatish Balay int mbs=baij->mbs; 34857b952d6SSatish Balay Scalar *a; 34957b952d6SSatish Balay 35057b952d6SSatish Balay if (!rank) { 351*cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 35257b952d6SSatish Balay CHKERRQ(ierr); 35357b952d6SSatish Balay } 35457b952d6SSatish Balay else { 355*cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 35657b952d6SSatish Balay CHKERRQ(ierr); 35757b952d6SSatish Balay } 35857b952d6SSatish Balay PLogObjectParent(mat,A); 35957b952d6SSatish Balay 36057b952d6SSatish Balay /* copy over the A part */ 36157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 36257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 36357b952d6SSatish Balay row = baij->rstart; 36457b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 36557b952d6SSatish Balay 36657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 36757b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 36857b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 36957b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 37057b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 37157b952d6SSatish Balay for (k=0; k<bs; k++ ) { 372*cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 373*cee3aa6bSSatish Balay col++; a += bs; 37457b952d6SSatish Balay } 37557b952d6SSatish Balay } 37657b952d6SSatish Balay } 37757b952d6SSatish Balay /* copy over the B part */ 37857b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 37957b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 38057b952d6SSatish Balay row = baij->rstart*bs; 38157b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 38257b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 38357b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 38457b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 38557b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 38657b952d6SSatish Balay for (k=0; k<bs; k++ ) { 387*cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 388*cee3aa6bSSatish Balay col++; a += bs; 38957b952d6SSatish Balay } 39057b952d6SSatish Balay } 39157b952d6SSatish Balay } 39257b952d6SSatish Balay PetscFree(rvals); 39357b952d6SSatish Balay ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 39457b952d6SSatish Balay ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 39557b952d6SSatish Balay if (!rank) { 39657b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 39757b952d6SSatish Balay } 39857b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 39957b952d6SSatish Balay } 40057b952d6SSatish Balay } 40157b952d6SSatish Balay return 0; 40257b952d6SSatish Balay } 40357b952d6SSatish Balay 40457b952d6SSatish Balay 40557b952d6SSatish Balay 40657b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 40757b952d6SSatish Balay { 40857b952d6SSatish Balay Mat mat = (Mat) obj; 40957b952d6SSatish Balay int ierr; 41057b952d6SSatish Balay ViewerType vtype; 41157b952d6SSatish Balay 41257b952d6SSatish Balay if (!viewer) { 41357b952d6SSatish Balay viewer = STDOUT_VIEWER_SELF; 41457b952d6SSatish Balay } 41557b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 41657b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 41757b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 41857b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 41957b952d6SSatish Balay } 42057b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 42157b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 42257b952d6SSatish Balay } 42357b952d6SSatish Balay return 0; 42457b952d6SSatish Balay } 42557b952d6SSatish Balay 42679bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 42779bdfe76SSatish Balay { 42879bdfe76SSatish Balay Mat mat = (Mat) obj; 42979bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 43079bdfe76SSatish Balay int ierr; 43179bdfe76SSatish Balay 43279bdfe76SSatish Balay #if defined(PETSC_LOG) 43379bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 43479bdfe76SSatish Balay #endif 43579bdfe76SSatish Balay 43679bdfe76SSatish Balay PetscFree(baij->rowners); 43779bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 43879bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 43979bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 44079bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 44179bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 44279bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 44379bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 44479bdfe76SSatish Balay PetscFree(baij); 44579bdfe76SSatish Balay PLogObjectDestroy(mat); 44679bdfe76SSatish Balay PetscHeaderDestroy(mat); 44779bdfe76SSatish Balay return 0; 44879bdfe76SSatish Balay } 44979bdfe76SSatish Balay 450*cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 451*cee3aa6bSSatish Balay { 452*cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 453*cee3aa6bSSatish Balay int ierr; 454*cee3aa6bSSatish Balay 455*cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 456*cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 457*cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 458*cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 459*cee3aa6bSSatish Balay return 0; 460*cee3aa6bSSatish Balay } 461*cee3aa6bSSatish Balay 462*cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 463*cee3aa6bSSatish Balay { 464*cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 465*cee3aa6bSSatish Balay int ierr; 466*cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 467*cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 468*cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 469*cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 470*cee3aa6bSSatish Balay return 0; 471*cee3aa6bSSatish Balay } 472*cee3aa6bSSatish Balay 473*cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 474*cee3aa6bSSatish Balay { 475*cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 476*cee3aa6bSSatish Balay int ierr; 477*cee3aa6bSSatish Balay 478*cee3aa6bSSatish Balay /* do nondiagonal part */ 479*cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 480*cee3aa6bSSatish Balay /* send it on its way */ 481*cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 482*cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 483*cee3aa6bSSatish Balay /* do local part */ 484*cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 485*cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 486*cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 487*cee3aa6bSSatish Balay /* but is not perhaps always true. */ 488*cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 489*cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 490*cee3aa6bSSatish Balay return 0; 491*cee3aa6bSSatish Balay } 492*cee3aa6bSSatish Balay 493*cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 494*cee3aa6bSSatish Balay { 495*cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 496*cee3aa6bSSatish Balay int ierr; 497*cee3aa6bSSatish Balay 498*cee3aa6bSSatish Balay /* do nondiagonal part */ 499*cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 500*cee3aa6bSSatish Balay /* send it on its way */ 501*cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 502*cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 503*cee3aa6bSSatish Balay /* do local part */ 504*cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 505*cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 506*cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 507*cee3aa6bSSatish Balay /* but is not perhaps always true. */ 508*cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 509*cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 510*cee3aa6bSSatish Balay return 0; 511*cee3aa6bSSatish Balay } 512*cee3aa6bSSatish Balay 513*cee3aa6bSSatish Balay /* 514*cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 515*cee3aa6bSSatish Balay diagonal block 516*cee3aa6bSSatish Balay */ 517*cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 518*cee3aa6bSSatish Balay { 519*cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 520*cee3aa6bSSatish Balay if (a->M != a->N) 521*cee3aa6bSSatish Balay SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block"); 522*cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 523*cee3aa6bSSatish Balay } 524*cee3aa6bSSatish Balay 525*cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 526*cee3aa6bSSatish Balay { 527*cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 528*cee3aa6bSSatish Balay int ierr; 529*cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 530*cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 531*cee3aa6bSSatish Balay return 0; 532*cee3aa6bSSatish Balay } 53357b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 53457b952d6SSatish Balay { 53557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 53657b952d6SSatish Balay *m = mat->M; *n = mat->N; 53757b952d6SSatish Balay return 0; 53857b952d6SSatish Balay } 53957b952d6SSatish Balay 54057b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 54157b952d6SSatish Balay { 54257b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 54357b952d6SSatish Balay *m = mat->m; *n = mat->N; 54457b952d6SSatish Balay return 0; 54557b952d6SSatish Balay } 54657b952d6SSatish Balay 54757b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 54857b952d6SSatish Balay { 54957b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 55057b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 55157b952d6SSatish Balay return 0; 55257b952d6SSatish Balay } 55357b952d6SSatish Balay 55479bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 55579bdfe76SSatish Balay static struct _MatOps MatOps = { 556*cee3aa6bSSatish Balay MatSetValues_MPIBAIJ,0,0,MatMult_MPIBAIJ, 557*cee3aa6bSSatish Balay MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 55879bdfe76SSatish Balay 0,0,0,0, 55979bdfe76SSatish Balay 0,0,0,0, 560*cee3aa6bSSatish Balay 0,MatGetDiagonal_MPIBAIJ,0,0, 56157b952d6SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0, 56257b952d6SSatish Balay 0,0,MatGetReordering_MPIBAIJ,0, 56357b952d6SSatish Balay 0,0,0,MatGetSize_MPIBAIJ, 56457b952d6SSatish Balay MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 56579bdfe76SSatish Balay 0,0,0,0, 56679bdfe76SSatish Balay 0,0,0,0, 56779bdfe76SSatish Balay 0,0,0,0, 56879bdfe76SSatish Balay 0,0,0,0, 569*cee3aa6bSSatish Balay MatScale_MPIBAIJ,0,0}; 57079bdfe76SSatish Balay 57179bdfe76SSatish Balay 57279bdfe76SSatish Balay /*@C 57379bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 57479bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 57579bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 57679bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 57779bdfe76SSatish Balay performance can be increased by more than a factor of 50. 57879bdfe76SSatish Balay 57979bdfe76SSatish Balay Input Parameters: 58079bdfe76SSatish Balay . comm - MPI communicator 58179bdfe76SSatish Balay . bs - size of blockk 58279bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 58379bdfe76SSatish Balay . n - number of local columns (or PETSC_DECIDE to have calculated 58479bdfe76SSatish Balay if N is given) 58579bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 58679bdfe76SSatish Balay . N - number of global columns (or PETSC_DECIDE to have calculated 58779bdfe76SSatish Balay if n is given) 58879bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 58979bdfe76SSatish Balay submatrix (same for all local rows) 59079bdfe76SSatish Balay . d_nzz - number of block nonzeros per block row in diagonal portion of local 59179bdfe76SSatish Balay submatrix or null (possibly different for each row). You must leave 59279bdfe76SSatish Balay room for the diagonal entry even if it is zero. 59379bdfe76SSatish Balay . o_nz - number of block nonzeros per block row in off-diagonal portion of local 59479bdfe76SSatish Balay submatrix (same for all local rows). 59579bdfe76SSatish Balay . o_nzz - number of block nonzeros per block row in off-diagonal portion of local 59679bdfe76SSatish Balay submatrix or null (possibly different for each row). 59779bdfe76SSatish Balay 59879bdfe76SSatish Balay Output Parameter: 59979bdfe76SSatish Balay . A - the matrix 60079bdfe76SSatish Balay 60179bdfe76SSatish Balay Notes: 60279bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 60379bdfe76SSatish Balay (possibly both). 60479bdfe76SSatish Balay 60579bdfe76SSatish Balay Storage Information: 60679bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 60779bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 60879bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 60979bdfe76SSatish Balay local matrix (a rectangular submatrix). 61079bdfe76SSatish Balay 61179bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 61279bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 61379bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 61479bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 61579bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 61679bdfe76SSatish Balay 61779bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 61879bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 61979bdfe76SSatish Balay 62079bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 62179bdfe76SSatish Balay $ ------------------- 62279bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 62379bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 62479bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 62579bdfe76SSatish Balay $ ------------------- 62679bdfe76SSatish Balay $ 62779bdfe76SSatish Balay 62879bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 62979bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 63079bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 63157b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 63279bdfe76SSatish Balay 63379bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 63479bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 63579bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 63679bdfe76SSatish Balay one expects d_nz >> o_nz. For additional details, see the users manual 63779bdfe76SSatish Balay chapter on matrices and the file $(PETSC_DIR)/Performance. 63879bdfe76SSatish Balay 63979bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel 64079bdfe76SSatish Balay 64179bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 64279bdfe76SSatish Balay @*/ 64379bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 64479bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 64579bdfe76SSatish Balay { 64679bdfe76SSatish Balay Mat B; 64779bdfe76SSatish Balay Mat_MPIBAIJ *b; 648*cee3aa6bSSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE; 64979bdfe76SSatish Balay 65079bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 65179bdfe76SSatish Balay *A = 0; 65279bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 65379bdfe76SSatish Balay PLogObjectCreate(B); 65479bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 65579bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 65679bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 65779bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 65879bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 65957b952d6SSatish Balay 66079bdfe76SSatish Balay B->factor = 0; 66179bdfe76SSatish Balay B->assembled = PETSC_FALSE; 66279bdfe76SSatish Balay 66379bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 66479bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 66579bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 66679bdfe76SSatish Balay 66779bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 66857b952d6SSatish Balay SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 669*cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified"); 670*cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified"); 671*cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 672*cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 67379bdfe76SSatish Balay 67479bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 67579bdfe76SSatish Balay work[0] = m; work[1] = n; 67679bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 67779bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 67879bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 67979bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 68079bdfe76SSatish Balay } 68179bdfe76SSatish Balay if (m == PETSC_DECIDE) { 68279bdfe76SSatish Balay Mbs = M/bs; 68379bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 68479bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 68579bdfe76SSatish Balay m = mbs*bs; 68679bdfe76SSatish Balay } 68779bdfe76SSatish Balay if (n == PETSC_DECIDE) { 68879bdfe76SSatish Balay Nbs = N/bs; 68979bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 69079bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 69179bdfe76SSatish Balay n = nbs*bs; 69279bdfe76SSatish Balay } 693*cee3aa6bSSatish Balay if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 69479bdfe76SSatish Balay 69579bdfe76SSatish Balay b->m = m; B->m = m; 69679bdfe76SSatish Balay b->n = n; B->n = n; 69779bdfe76SSatish Balay b->N = N; B->N = N; 69879bdfe76SSatish Balay b->M = M; B->M = M; 69979bdfe76SSatish Balay b->bs = bs; 70079bdfe76SSatish Balay b->bs2 = bs*bs; 70179bdfe76SSatish Balay b->mbs = mbs; 70279bdfe76SSatish Balay b->nbs = nbs; 70379bdfe76SSatish Balay b->Mbs = Mbs; 70479bdfe76SSatish Balay b->Nbs = Nbs; 70579bdfe76SSatish Balay 70679bdfe76SSatish Balay /* build local table of row and column ownerships */ 70779bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 70879bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 70979bdfe76SSatish Balay b->cowners = b->rowners + b->size + 1; 71079bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 71179bdfe76SSatish Balay b->rowners[0] = 0; 71279bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 71379bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 71479bdfe76SSatish Balay } 71579bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 71679bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 71779bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 71879bdfe76SSatish Balay b->cowners[0] = 0; 71979bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 72079bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 72179bdfe76SSatish Balay } 72279bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 72379bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 72479bdfe76SSatish Balay 72579bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 72679bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 72779bdfe76SSatish Balay PLogObjectParent(B,b->A); 72879bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 72979bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 73079bdfe76SSatish Balay PLogObjectParent(B,b->B); 73179bdfe76SSatish Balay 73279bdfe76SSatish Balay /* build cache for off array entries formed */ 73379bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 73479bdfe76SSatish Balay b->colmap = 0; 73579bdfe76SSatish Balay b->garray = 0; 73679bdfe76SSatish Balay b->roworiented = 1; 73779bdfe76SSatish Balay 73879bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 73979bdfe76SSatish Balay b->lvec = 0; 74079bdfe76SSatish Balay b->Mvctx = 0; 74179bdfe76SSatish Balay 74279bdfe76SSatish Balay /* stuff for MatGetRow() */ 74379bdfe76SSatish Balay b->rowindices = 0; 74479bdfe76SSatish Balay b->rowvalues = 0; 74579bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 74679bdfe76SSatish Balay 74779bdfe76SSatish Balay *A = B; 74879bdfe76SSatish Balay return 0; 74979bdfe76SSatish Balay } 75057b952d6SSatish Balay 75157b952d6SSatish Balay #include "sys.h" 75257b952d6SSatish Balay 75357b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 75457b952d6SSatish Balay { 75557b952d6SSatish Balay Mat A; 75657b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 75757b952d6SSatish Balay Scalar *vals,*buf; 75857b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 75957b952d6SSatish Balay MPI_Status status; 760*cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 76157b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 76257b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 76357b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 76457b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 76557b952d6SSatish Balay 76657b952d6SSatish Balay 76757b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 76857b952d6SSatish Balay bs2 = bs*bs; 76957b952d6SSatish Balay 77057b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 77157b952d6SSatish Balay if (!rank) { 77257b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 77357b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 774*cee3aa6bSSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object"); 77557b952d6SSatish Balay } 77657b952d6SSatish Balay 77757b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 77857b952d6SSatish Balay M = header[1]; N = header[2]; 77957b952d6SSatish Balay 78057b952d6SSatish Balay 78157b952d6SSatish Balay if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 78257b952d6SSatish Balay 78357b952d6SSatish Balay /* 78457b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 78557b952d6SSatish Balay divisible by the blocksize 78657b952d6SSatish Balay */ 78757b952d6SSatish Balay Mbs = M/bs; 78857b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 78957b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 79057b952d6SSatish Balay else Mbs++; 79157b952d6SSatish Balay if (extra_rows &&!rank) { 79257b952d6SSatish Balay PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 79357b952d6SSatish Balay } 79457b952d6SSatish Balay /* determine ownership of all rows */ 79557b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 79657b952d6SSatish Balay m = mbs * bs; 797*cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 798*cee3aa6bSSatish Balay browners = rowners + size + 1; 79957b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 80057b952d6SSatish Balay rowners[0] = 0; 801*cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 802*cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 80357b952d6SSatish Balay rstart = rowners[rank]; 80457b952d6SSatish Balay rend = rowners[rank+1]; 80557b952d6SSatish Balay 80657b952d6SSatish Balay /* distribute row lengths to all processors */ 80757b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 80857b952d6SSatish Balay if (!rank) { 80957b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 81057b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 81157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 81257b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 813*cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 814*cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 81557b952d6SSatish Balay PetscFree(sndcounts); 81657b952d6SSatish Balay } 81757b952d6SSatish Balay else { 81857b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 81957b952d6SSatish Balay } 82057b952d6SSatish Balay 82157b952d6SSatish Balay if (!rank) { 82257b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 82357b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 82457b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 82557b952d6SSatish Balay for ( i=0; i<size; i++ ) { 82657b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 82757b952d6SSatish Balay procsnz[i] += rowlengths[j]; 82857b952d6SSatish Balay } 82957b952d6SSatish Balay } 83057b952d6SSatish Balay PetscFree(rowlengths); 83157b952d6SSatish Balay 83257b952d6SSatish Balay /* determine max buffer needed and allocate it */ 83357b952d6SSatish Balay maxnz = 0; 83457b952d6SSatish Balay for ( i=0; i<size; i++ ) { 83557b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 83657b952d6SSatish Balay } 83757b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 83857b952d6SSatish Balay 83957b952d6SSatish Balay /* read in my part of the matrix column indices */ 84057b952d6SSatish Balay nz = procsnz[0]; 84157b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 84257b952d6SSatish Balay mycols = ibuf; 843*cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 84457b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 845*cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 846*cee3aa6bSSatish Balay 84757b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 84857b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 84957b952d6SSatish Balay nz = procsnz[i]; 85057b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 85157b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 85257b952d6SSatish Balay } 85357b952d6SSatish Balay /* read in the stuff for the last proc */ 85457b952d6SSatish Balay if ( size != 1 ) { 85557b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 85657b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 85757b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 858*cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 85957b952d6SSatish Balay } 86057b952d6SSatish Balay PetscFree(cols); 86157b952d6SSatish Balay } 86257b952d6SSatish Balay else { 86357b952d6SSatish Balay /* determine buffer space needed for message */ 86457b952d6SSatish Balay nz = 0; 86557b952d6SSatish Balay for ( i=0; i<m; i++ ) { 86657b952d6SSatish Balay nz += locrowlens[i]; 86757b952d6SSatish Balay } 86857b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 86957b952d6SSatish Balay mycols = ibuf; 87057b952d6SSatish Balay /* receive message of column indices*/ 87157b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 87257b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 873*cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 87457b952d6SSatish Balay } 87557b952d6SSatish Balay 87657b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 877*cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 878*cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 87957b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 880*cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 88157b952d6SSatish Balay masked1 = mask + Mbs; 88257b952d6SSatish Balay masked2 = masked1 + Mbs; 88357b952d6SSatish Balay rowcount = 0; nzcount = 0; 88457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 88557b952d6SSatish Balay dcount = 0; 88657b952d6SSatish Balay odcount = 0; 88757b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 88857b952d6SSatish Balay kmax = locrowlens[rowcount]; 88957b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 89057b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 89157b952d6SSatish Balay if (!mask[tmp]) { 89257b952d6SSatish Balay mask[tmp] = 1; 89357b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 89457b952d6SSatish Balay else masked1[dcount++] = tmp; 89557b952d6SSatish Balay } 89657b952d6SSatish Balay } 89757b952d6SSatish Balay rowcount++; 89857b952d6SSatish Balay } 899*cee3aa6bSSatish Balay 90057b952d6SSatish Balay dlens[i] = dcount; 90157b952d6SSatish Balay odlens[i] = odcount; 902*cee3aa6bSSatish Balay 90357b952d6SSatish Balay /* zero out the mask elements we set */ 90457b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 90557b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 90657b952d6SSatish Balay } 907*cee3aa6bSSatish Balay 90857b952d6SSatish Balay /* create our matrix */ 90957b952d6SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 91057b952d6SSatish Balay A = *newmat; 91157b952d6SSatish Balay MatSetOption(A,COLUMNS_SORTED); 91257b952d6SSatish Balay 91357b952d6SSatish Balay if (!rank) { 91457b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 91557b952d6SSatish Balay /* read in my part of the matrix numerical values */ 91657b952d6SSatish Balay nz = procsnz[0]; 91757b952d6SSatish Balay vals = buf; 918*cee3aa6bSSatish Balay mycols = ibuf; 919*cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 92057b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 921*cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 92257b952d6SSatish Balay /* insert into matrix */ 92357b952d6SSatish Balay jj = rstart*bs; 92457b952d6SSatish Balay for ( i=0; i<m; i++ ) { 92557b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 92657b952d6SSatish Balay mycols += locrowlens[i]; 92757b952d6SSatish Balay vals += locrowlens[i]; 92857b952d6SSatish Balay jj++; 92957b952d6SSatish Balay } 93057b952d6SSatish Balay /* read in other processors( except the last one) and ship out */ 93157b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 93257b952d6SSatish Balay nz = procsnz[i]; 93357b952d6SSatish Balay vals = buf; 93457b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 93557b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 93657b952d6SSatish Balay } 93757b952d6SSatish Balay /* the last proc */ 93857b952d6SSatish Balay if ( size != 1 ){ 93957b952d6SSatish Balay nz = procsnz[i] - extra_rows; 940*cee3aa6bSSatish Balay vals = buf; 94157b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 94257b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 943*cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 94457b952d6SSatish Balay } 94557b952d6SSatish Balay PetscFree(procsnz); 94657b952d6SSatish Balay } 94757b952d6SSatish Balay else { 94857b952d6SSatish Balay /* receive numeric values */ 94957b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 95057b952d6SSatish Balay 95157b952d6SSatish Balay /* receive message of values*/ 95257b952d6SSatish Balay vals = buf; 953*cee3aa6bSSatish Balay mycols = ibuf; 95457b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 95557b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 956*cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 95757b952d6SSatish Balay 95857b952d6SSatish Balay /* insert into matrix */ 95957b952d6SSatish Balay jj = rstart*bs; 960*cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 96157b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 96257b952d6SSatish Balay mycols += locrowlens[i]; 96357b952d6SSatish Balay vals += locrowlens[i]; 96457b952d6SSatish Balay jj++; 96557b952d6SSatish Balay } 96657b952d6SSatish Balay } 96757b952d6SSatish Balay PetscFree(locrowlens); 96857b952d6SSatish Balay PetscFree(buf); 96957b952d6SSatish Balay PetscFree(ibuf); 97057b952d6SSatish Balay PetscFree(rowners); 97157b952d6SSatish Balay PetscFree(dlens); 972*cee3aa6bSSatish Balay PetscFree(mask); 97357b952d6SSatish Balay ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 97457b952d6SSatish Balay ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 97557b952d6SSatish Balay return 0; 97657b952d6SSatish Balay } 97757b952d6SSatish Balay 97857b952d6SSatish Balay 979