1*a84739b8SRichard Tran Mills #include <../src/mat/impls/aij/mpi/mpiaij.h> 2*a84739b8SRichard Tran Mills #undef __FUNCT__ 3*a84739b8SRichard Tran Mills #define __FUNCT__ "MatCreateMPIAIJMKL" 4*a84739b8SRichard Tran Mills /*@C 5*a84739b8SRichard Tran Mills MatCreateMPIAIJMKL - Creates a sparse parallel matrix whose local 6*a84739b8SRichard Tran Mills portions are stored as SEQAIJMKL matrices (a matrix class that inherits 7*a84739b8SRichard Tran Mills from SEQAIJ but uses some operations provided by Intel MKL). The same 8*a84739b8SRichard Tran Mills guidelines that apply to MPIAIJ matrices for preallocating the matrix 9*a84739b8SRichard Tran Mills storage apply here as well. 10*a84739b8SRichard Tran Mills 11*a84739b8SRichard Tran Mills Collective on MPI_Comm 12*a84739b8SRichard Tran Mills 13*a84739b8SRichard Tran Mills Input Parameters: 14*a84739b8SRichard Tran Mills + comm - MPI communicator 15*a84739b8SRichard Tran Mills . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 16*a84739b8SRichard Tran Mills This value should be the same as the local size used in creating the 17*a84739b8SRichard Tran Mills y vector for the matrix-vector product y = Ax. 18*a84739b8SRichard Tran Mills . n - This value should be the same as the local size used in creating the 19*a84739b8SRichard Tran Mills x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 20*a84739b8SRichard Tran Mills calculated if N is given) For square matrices n is almost always m. 21*a84739b8SRichard Tran Mills . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 22*a84739b8SRichard Tran Mills . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 23*a84739b8SRichard Tran Mills . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 24*a84739b8SRichard Tran Mills (same value is used for all local rows) 25*a84739b8SRichard Tran Mills . d_nnz - array containing the number of nonzeros in the various rows of the 26*a84739b8SRichard Tran Mills DIAGONAL portion of the local submatrix (possibly different for each row) 27*a84739b8SRichard Tran Mills or NULL, if d_nz is used to specify the nonzero structure. 28*a84739b8SRichard Tran Mills The size of this array is equal to the number of local rows, i.e 'm'. 29*a84739b8SRichard Tran Mills For matrices you plan to factor you must leave room for the diagonal entry and 30*a84739b8SRichard Tran Mills put in the entry even if it is zero. 31*a84739b8SRichard Tran Mills . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 32*a84739b8SRichard Tran Mills submatrix (same value is used for all local rows). 33*a84739b8SRichard Tran Mills - o_nnz - array containing the number of nonzeros in the various rows of the 34*a84739b8SRichard Tran Mills OFF-DIAGONAL portion of the local submatrix (possibly different for 35*a84739b8SRichard Tran Mills each row) or NULL, if o_nz is used to specify the nonzero 36*a84739b8SRichard Tran Mills structure. The size of this array is equal to the number 37*a84739b8SRichard Tran Mills of local rows, i.e 'm'. 38*a84739b8SRichard Tran Mills 39*a84739b8SRichard Tran Mills Output Parameter: 40*a84739b8SRichard Tran Mills . A - the matrix 41*a84739b8SRichard Tran Mills 42*a84739b8SRichard Tran Mills Notes: 43*a84739b8SRichard Tran Mills If the *_nnz parameter is given then the *_nz parameter is ignored 44*a84739b8SRichard Tran Mills 45*a84739b8SRichard Tran Mills m,n,M,N parameters specify the size of the matrix, and its partitioning across 46*a84739b8SRichard Tran Mills processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 47*a84739b8SRichard Tran Mills storage requirements for this matrix. 48*a84739b8SRichard Tran Mills 49*a84739b8SRichard Tran Mills If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 50*a84739b8SRichard Tran Mills processor than it must be used on all processors that share the object for 51*a84739b8SRichard Tran Mills that argument. 52*a84739b8SRichard Tran Mills 53*a84739b8SRichard Tran Mills The user MUST specify either the local or global matrix dimensions 54*a84739b8SRichard Tran Mills (possibly both). 55*a84739b8SRichard Tran Mills 56*a84739b8SRichard Tran Mills The parallel matrix is partitioned such that the first m0 rows belong to 57*a84739b8SRichard Tran Mills process 0, the next m1 rows belong to process 1, the next m2 rows belong 58*a84739b8SRichard Tran Mills to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 59*a84739b8SRichard Tran Mills 60*a84739b8SRichard Tran Mills The DIAGONAL portion of the local submatrix of a processor can be defined 61*a84739b8SRichard Tran Mills as the submatrix which is obtained by extraction the part corresponding 62*a84739b8SRichard Tran Mills to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 63*a84739b8SRichard Tran Mills first row that belongs to the processor, and r2 is the last row belonging 64*a84739b8SRichard Tran Mills to the this processor. This is a square mxm matrix. The remaining portion 65*a84739b8SRichard Tran Mills of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 66*a84739b8SRichard Tran Mills 67*a84739b8SRichard Tran Mills If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 68*a84739b8SRichard Tran Mills 69*a84739b8SRichard Tran Mills When calling this routine with a single process communicator, a matrix of 70*a84739b8SRichard Tran Mills type SEQAIJMKL is returned. If a matrix of type MPIAIJMKL is desired 71*a84739b8SRichard Tran Mills for this type of communicator, use the construction mechanism: 72*a84739b8SRichard Tran Mills MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 73*a84739b8SRichard Tran Mills 74*a84739b8SRichard Tran Mills By default, this format uses inodes (identical nodes) when possible. 75*a84739b8SRichard Tran Mills We search for consecutive rows with the same nonzero structure, thereby 76*a84739b8SRichard Tran Mills reusing matrix information to achieve increased efficiency. 77*a84739b8SRichard Tran Mills 78*a84739b8SRichard Tran Mills Options Database Keys: 79*a84739b8SRichard Tran Mills + -mat_no_inode - Do not use inodes 80*a84739b8SRichard Tran Mills . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 81*a84739b8SRichard Tran Mills - -mat_aij_oneindex - Internally use indexing starting at 1 82*a84739b8SRichard Tran Mills rather than 0. Note that when calling MatSetValues(), 83*a84739b8SRichard Tran Mills the user still MUST index entries starting at 0! 84*a84739b8SRichard Tran Mills 85*a84739b8SRichard Tran Mills Level: intermediate 86*a84739b8SRichard Tran Mills 87*a84739b8SRichard Tran Mills .keywords: matrix, cray, sparse, parallel 88*a84739b8SRichard Tran Mills 89*a84739b8SRichard Tran Mills .seealso: MatCreate(), MatCreateSeqAIJMKL(), MatSetValues() 90*a84739b8SRichard Tran Mills @*/ 91*a84739b8SRichard Tran Mills PetscErrorCode MatCreateMPIAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 92*a84739b8SRichard Tran Mills { 93*a84739b8SRichard Tran Mills PetscErrorCode ierr; 94*a84739b8SRichard Tran Mills PetscMPIInt size; 95*a84739b8SRichard Tran Mills 96*a84739b8SRichard Tran Mills PetscFunctionBegin; 97*a84739b8SRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 98*a84739b8SRichard Tran Mills ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 99*a84739b8SRichard Tran Mills ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 100*a84739b8SRichard Tran Mills if (size > 1) { 101*a84739b8SRichard Tran Mills ierr = MatSetType(*A,MATMPIAIJMKL);CHKERRQ(ierr); 102*a84739b8SRichard Tran Mills ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 103*a84739b8SRichard Tran Mills } else { 104*a84739b8SRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 105*a84739b8SRichard Tran Mills ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 106*a84739b8SRichard Tran Mills } 107*a84739b8SRichard Tran Mills PetscFunctionReturn(0); 108*a84739b8SRichard Tran Mills } 109*a84739b8SRichard Tran Mills 110*a84739b8SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat,MatType,MatReuse,Mat*); 111*a84739b8SRichard Tran Mills 112*a84739b8SRichard Tran Mills #undef __FUNCT__ 113*a84739b8SRichard Tran Mills #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJMKL" 114*a84739b8SRichard Tran Mills PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJMKL(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 115*a84739b8SRichard Tran Mills { 116*a84739b8SRichard Tran Mills Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 117*a84739b8SRichard Tran Mills PetscErrorCode ierr; 118*a84739b8SRichard Tran Mills 119*a84739b8SRichard Tran Mills PetscFunctionBegin; 120*a84739b8SRichard Tran Mills ierr = MatMPIAIJSetPreallocation_MPIAIJ(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 121*a84739b8SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(b->A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &b->A);CHKERRQ(ierr); 122*a84739b8SRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(b->B, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &b->B);CHKERRQ(ierr); 123*a84739b8SRichard Tran Mills PetscFunctionReturn(0); 124*a84739b8SRichard Tran Mills } 125*a84739b8SRichard Tran Mills 126*a84739b8SRichard Tran Mills #undef __FUNCT__ 127*a84739b8SRichard Tran Mills #define __FUNCT__ "MatConvert_MPIAIJ_MPIAIJMKL" 128*a84739b8SRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 129*a84739b8SRichard Tran Mills { 130*a84739b8SRichard Tran Mills PetscErrorCode ierr; 131*a84739b8SRichard Tran Mills Mat B = *newmat; 132*a84739b8SRichard Tran Mills 133*a84739b8SRichard Tran Mills PetscFunctionBegin; 134*a84739b8SRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 135*a84739b8SRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 136*a84739b8SRichard Tran Mills } 137*a84739b8SRichard Tran Mills 138*a84739b8SRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject) B, MATMPIAIJMKL);CHKERRQ(ierr); 139*a84739b8SRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJMKL);CHKERRQ(ierr); 140*a84739b8SRichard Tran Mills *newmat = B; 141*a84739b8SRichard Tran Mills PetscFunctionReturn(0); 142*a84739b8SRichard Tran Mills } 143*a84739b8SRichard Tran Mills 144*a84739b8SRichard Tran Mills #undef __FUNCT__ 145*a84739b8SRichard Tran Mills #define __FUNCT__ "MatCreate_MPIAIJMKL" 146*a84739b8SRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJMKL(Mat A) 147*a84739b8SRichard Tran Mills { 148*a84739b8SRichard Tran Mills PetscErrorCode ierr; 149*a84739b8SRichard Tran Mills 150*a84739b8SRichard Tran Mills PetscFunctionBegin; 151*a84739b8SRichard Tran Mills ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 152*a84739b8SRichard Tran Mills ierr = MatConvert_MPIAIJ_MPIAIJMKL(A,MATMPIAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 153*a84739b8SRichard Tran Mills PetscFunctionReturn(0); 154*a84739b8SRichard Tran Mills } 155*a84739b8SRichard Tran Mills 156*a84739b8SRichard Tran Mills /*MC 157*a84739b8SRichard Tran Mills MATAIJMKL - MATAIJMKL = "AIJMKL" - A matrix type to be used for sparse matrices. 158*a84739b8SRichard Tran Mills 159*a84739b8SRichard Tran Mills This matrix type is identical to MATSEQAIJMKL when constructed with a single process communicator, 160*a84739b8SRichard Tran Mills and MATMPIAIJMKL otherwise. As a result, for single process communicators, 161*a84739b8SRichard Tran Mills MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 162*a84739b8SRichard Tran Mills for communicators controlling multiple processes. It is recommended that you call both of 163*a84739b8SRichard Tran Mills the above preallocation routines for simplicity. 164*a84739b8SRichard Tran Mills 165*a84739b8SRichard Tran Mills Options Database Keys: 166*a84739b8SRichard Tran Mills . -mat_type aijmkl - sets the matrix type to "AIJMKL" during a call to MatSetFromOptions() 167*a84739b8SRichard Tran Mills 168*a84739b8SRichard Tran Mills Level: beginner 169*a84739b8SRichard Tran Mills 170*a84739b8SRichard Tran Mills .seealso: MatCreateMPIAIJMKL(), MATSEQAIJMKL, MATMPIAIJMKL 171*a84739b8SRichard Tran Mills M*/ 172*a84739b8SRichard Tran Mills 173