1b2f1ab58SBarry Smith #include "petsc.h" 2b2f1ab58SBarry Smith #include "../src/mat/impls/aij/seq/aij.h" 3b2f1ab58SBarry Smith #include "spbas.h" 4b2f1ab58SBarry Smith 59ccc4a9bSBarry Smith 6b2f1ab58SBarry Smith /* 7b2f1ab58SBarry Smith spbas_memory_requirement: 8b2f1ab58SBarry Smith Calculate the number of bytes needed to store tha matrix 9b2f1ab58SBarry Smith */ 10b2f1ab58SBarry Smith #undef __FUNCT__ 11b2f1ab58SBarry Smith #define __FUNCT__ "spbas_memory_requirement" 12b2f1ab58SBarry Smith long int spbas_memory_requirement( spbas_matrix matrix) 13b2f1ab58SBarry Smith { 14c328eeadSBarry Smith long int memreq = 6 * sizeof(PetscInt) + /* nrows, ncols, nnz, n_alloc_icol, 15c328eeadSBarry Smith n_alloc_val, col_idx_type */ 16c328eeadSBarry Smith sizeof(PetscTruth) + /* block_data */ 17c328eeadSBarry Smith sizeof(PetscScalar**) + /* values */ 18c328eeadSBarry Smith sizeof(PetscScalar*) + /* alloc_val */ 19c328eeadSBarry Smith 2 * sizeof(int**) + /* icols, icols0 */ 20c328eeadSBarry Smith 2 * sizeof(PetscInt*) + /* row_nnz, alloc_icol */ 21c328eeadSBarry Smith matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */ 22c328eeadSBarry Smith matrix.nrows * sizeof(PetscInt*); /* icols[*] */ 23b2f1ab58SBarry Smith 24c328eeadSBarry Smith /* icol0[*] */ 25b2f1ab58SBarry Smith if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) 26b2f1ab58SBarry Smith { memreq += matrix.nrows * sizeof(PetscInt); } 27b2f1ab58SBarry Smith 28c328eeadSBarry Smith /* icols[*][*] */ 29b2f1ab58SBarry Smith if (matrix.block_data) 30b2f1ab58SBarry Smith { memreq += matrix.n_alloc_icol * sizeof(PetscInt); } 31b2f1ab58SBarry Smith else 32b2f1ab58SBarry Smith { memreq += matrix.nnz * sizeof(PetscInt); } 33b2f1ab58SBarry Smith 34b2f1ab58SBarry Smith if (matrix.values) 35b2f1ab58SBarry Smith { 36c328eeadSBarry Smith memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */ 37c328eeadSBarry Smith /* values[*][*] */ 38b2f1ab58SBarry Smith if (matrix.block_data) 39b2f1ab58SBarry Smith { memreq += matrix.n_alloc_val * sizeof(PetscScalar); } 40b2f1ab58SBarry Smith else 41b2f1ab58SBarry Smith { memreq += matrix.nnz * sizeof(PetscScalar); } 42b2f1ab58SBarry Smith } 43b2f1ab58SBarry Smith 44b2f1ab58SBarry Smith return memreq; 45b2f1ab58SBarry Smith } 46b2f1ab58SBarry Smith 47b2f1ab58SBarry Smith /* 48b2f1ab58SBarry Smith spbas_allocate_pattern: 49b2f1ab58SBarry Smith allocate the pattern arrays row_nnz, icols and optionally values 50b2f1ab58SBarry Smith */ 51b2f1ab58SBarry Smith #undef __FUNCT__ 52b2f1ab58SBarry Smith #define __FUNCT__ "spbas_allocate_pattern" 53b2f1ab58SBarry Smith PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values) 54b2f1ab58SBarry Smith { 55b2f1ab58SBarry Smith PetscErrorCode ierr; 56b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 57b2f1ab58SBarry Smith PetscInt col_idx_type = result->col_idx_type; 58b2f1ab58SBarry Smith 59b2f1ab58SBarry Smith PetscFunctionBegin; 60c328eeadSBarry Smith /* Allocate sparseness pattern */ 61b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr); 62b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr); 63b2f1ab58SBarry Smith 64c328eeadSBarry Smith /* If offsets are given wrt an array, create array */ 65b2f1ab58SBarry Smith if (col_idx_type == SPBAS_OFFSET_ARRAY) 66b2f1ab58SBarry Smith { 67b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr); 68b2f1ab58SBarry Smith } 69b2f1ab58SBarry Smith else 70b2f1ab58SBarry Smith { 71c328eeadSBarry Smith result->icol0 = PETSC_NULL; 72b2f1ab58SBarry Smith } 73b2f1ab58SBarry Smith 74c328eeadSBarry Smith /* If values are given, allocate values array */ 75b2f1ab58SBarry Smith if (do_values) 76b2f1ab58SBarry Smith { 77c328eeadSBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr); 78b2f1ab58SBarry Smith } 79b2f1ab58SBarry Smith else 80b2f1ab58SBarry Smith { 81c328eeadSBarry Smith result->values = PETSC_NULL; 82b2f1ab58SBarry Smith } 83b2f1ab58SBarry Smith 84b2f1ab58SBarry Smith PetscFunctionReturn(0); 85b2f1ab58SBarry Smith } 86b2f1ab58SBarry Smith 87b2f1ab58SBarry Smith /* 88b2f1ab58SBarry Smith spbas_allocate_data: 89b2f1ab58SBarry Smith in case of block_data: 90b2f1ab58SBarry Smith Allocate the data arrays alloc_icol and optionally alloc_val, 91b2f1ab58SBarry Smith set appropriate pointers from icols and values; 92b2f1ab58SBarry Smith in case of !block_data: 93b2f1ab58SBarry Smith Allocate the arrays icols[i] and optionally values[i] 94b2f1ab58SBarry Smith */ 95b2f1ab58SBarry Smith #undef __FUNCT__ 96b2f1ab58SBarry Smith #define __FUNCT__ "spbas_allocate_data" 97b2f1ab58SBarry Smith PetscErrorCode spbas_allocate_data( spbas_matrix * result) 98b2f1ab58SBarry Smith { 99b2f1ab58SBarry Smith PetscInt i; 100b2f1ab58SBarry Smith PetscInt nnz = result->nnz; 101b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 102b2f1ab58SBarry Smith PetscInt r_nnz; 103b2f1ab58SBarry Smith PetscErrorCode ierr; 104c328eeadSBarry Smith PetscTruth do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE; 105b2f1ab58SBarry Smith PetscTruth block_data = result->block_data; 106b2f1ab58SBarry Smith 107b2f1ab58SBarry Smith PetscFunctionBegin; 108b2f1ab58SBarry Smith if (block_data) 109b2f1ab58SBarry Smith { 110c328eeadSBarry Smith /* Allocate the column number array and point to it */ 111b2f1ab58SBarry Smith result->n_alloc_icol = nnz; 112c328eeadSBarry Smith ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr); 113b2f1ab58SBarry Smith result->icols[0]= result->alloc_icol; 114b2f1ab58SBarry Smith for (i=1; i<nrows; i++) 115b2f1ab58SBarry Smith { 116b2f1ab58SBarry Smith result->icols[i] = result->icols[i-1] + result->row_nnz[i-1]; 117b2f1ab58SBarry Smith } 118b2f1ab58SBarry Smith 119c328eeadSBarry Smith /* Allocate the value array and point to it */ 120b2f1ab58SBarry Smith if (do_values) 121b2f1ab58SBarry Smith { 122b2f1ab58SBarry Smith result->n_alloc_val= nnz; 123c328eeadSBarry Smith ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr); 124b2f1ab58SBarry Smith result->values[0]= result->alloc_val; 125b2f1ab58SBarry Smith for (i=1; i<nrows; i++) 126b2f1ab58SBarry Smith { 127b2f1ab58SBarry Smith result->values[i] = result->values[i-1] + result->row_nnz[i-1]; 128b2f1ab58SBarry Smith } 129b2f1ab58SBarry Smith } 130b2f1ab58SBarry Smith } 131b2f1ab58SBarry Smith else 132b2f1ab58SBarry Smith { 133b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 134b2f1ab58SBarry Smith { 135b2f1ab58SBarry Smith r_nnz = result->row_nnz[i]; 136c328eeadSBarry Smith ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr); 137b2f1ab58SBarry Smith } 138b2f1ab58SBarry Smith if (do_values) 139b2f1ab58SBarry Smith { 140b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 141b2f1ab58SBarry Smith { 142b2f1ab58SBarry Smith r_nnz = result->row_nnz[i]; 143b2f1ab58SBarry Smith ierr = PetscMalloc(r_nnz*sizeof(PetscScalar), 144b2f1ab58SBarry Smith &result->values[i]);CHKERRQ(ierr); 145b2f1ab58SBarry Smith } 146b2f1ab58SBarry Smith } 147b2f1ab58SBarry Smith } 148b2f1ab58SBarry Smith 149b2f1ab58SBarry Smith PetscFunctionReturn(0); 150b2f1ab58SBarry Smith } 151b2f1ab58SBarry Smith 152b2f1ab58SBarry Smith /* 153b2f1ab58SBarry Smith spbas_row_order_icol 154b2f1ab58SBarry Smith determine if row i1 should come 155b2f1ab58SBarry Smith + before row i2 in the sorted rows (return -1), 156b2f1ab58SBarry Smith + after (return 1) 157b2f1ab58SBarry Smith + is identical (return 0). 158b2f1ab58SBarry Smith */ 159b2f1ab58SBarry Smith #undef __FUNCT__ 160b2f1ab58SBarry Smith #define __FUNCT__ "spbas_row_order_icol" 161b2f1ab58SBarry Smith int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type) 162b2f1ab58SBarry Smith { 163b2f1ab58SBarry Smith PetscInt j; 164b2f1ab58SBarry Smith PetscInt nnz1 = irow_in[i1+1] - irow_in[i1]; 165b2f1ab58SBarry Smith PetscInt nnz2 = irow_in[i2+1] - irow_in[i2]; 166b2f1ab58SBarry Smith 167b2f1ab58SBarry Smith PetscInt * icol1 = &icol_in[irow_in[i1]]; 168b2f1ab58SBarry Smith PetscInt * icol2 = &icol_in[irow_in[i2]]; 169b2f1ab58SBarry Smith 170b2f1ab58SBarry Smith if (nnz1<nnz2) {return -1;} 171b2f1ab58SBarry Smith if (nnz1>nnz2) {return 1;} 172b2f1ab58SBarry Smith 173b2f1ab58SBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) 174b2f1ab58SBarry Smith { 175b2f1ab58SBarry Smith for (j=0; j<nnz1; j++) 176b2f1ab58SBarry Smith { 177b2f1ab58SBarry Smith if (icol1[j]< icol2[j]) {return -1;} 178b2f1ab58SBarry Smith if (icol1[j]> icol2[j]) {return 1;} 179b2f1ab58SBarry Smith } 180b2f1ab58SBarry Smith } 181b2f1ab58SBarry Smith else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 182b2f1ab58SBarry Smith { 183b2f1ab58SBarry Smith for (j=0; j<nnz1; j++) 184b2f1ab58SBarry Smith { 185b2f1ab58SBarry Smith if (icol1[j]-i1< icol2[j]-i2) {return -1;} 186b2f1ab58SBarry Smith if (icol1[j]-i1> icol2[j]-i2) {return 1;} 187b2f1ab58SBarry Smith } 188b2f1ab58SBarry Smith } 189b2f1ab58SBarry Smith else if (col_idx_type == SPBAS_OFFSET_ARRAY) 190b2f1ab58SBarry Smith { 191b2f1ab58SBarry Smith for (j=1; j<nnz1; j++) 192b2f1ab58SBarry Smith { 193b2f1ab58SBarry Smith if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;} 194b2f1ab58SBarry Smith if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;} 195b2f1ab58SBarry Smith } 196b2f1ab58SBarry Smith } 197b2f1ab58SBarry Smith return 0; 198b2f1ab58SBarry Smith } 199b2f1ab58SBarry Smith 200b2f1ab58SBarry Smith /* 201b2f1ab58SBarry Smith spbas_mergesort_icols: 202b2f1ab58SBarry Smith return a sorting of the rows in which identical sparseness patterns are 203b2f1ab58SBarry Smith next to each other 204b2f1ab58SBarry Smith */ 205b2f1ab58SBarry Smith #undef __FUNCT__ 206b2f1ab58SBarry Smith #define __FUNCT__ "spbas_mergesort_icols" 207b2f1ab58SBarry Smith PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort) 208b2f1ab58SBarry Smith { 209b2f1ab58SBarry Smith PetscErrorCode ierr; 210c328eeadSBarry Smith PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 211b2f1ab58SBarry Smith 212c328eeadSBarry Smith PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 213b2f1ab58SBarry Smith 214c328eeadSBarry Smith PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 215b2f1ab58SBarry Smith 216c328eeadSBarry Smith PetscInt *ialloc; /* Allocated arrays */ 217c328eeadSBarry Smith PetscInt *iswap; /* auxiliary pointers for swapping */ 218c328eeadSBarry Smith PetscInt *ihlp1; /* Pointers to new version of arrays, */ 219c328eeadSBarry Smith PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 220b2f1ab58SBarry Smith 221b2f1ab58SBarry Smith PetscFunctionBegin; 222b2f1ab58SBarry Smith 223b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr); 224b2f1ab58SBarry Smith 225b2f1ab58SBarry Smith ihlp1 = ialloc; 226b2f1ab58SBarry Smith ihlp2 = isort; 227b2f1ab58SBarry Smith 228c328eeadSBarry Smith /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 229b2f1ab58SBarry Smith for (istep=1; istep<nrows; istep*=2) 230b2f1ab58SBarry Smith { 231c328eeadSBarry Smith /* 232c328eeadSBarry Smith Combine sorted parts 233c328eeadSBarry Smith istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 234c328eeadSBarry Smith of ihlp2 and vhlp2 235c328eeadSBarry Smith 236c328eeadSBarry Smith into one sorted part 237c328eeadSBarry Smith istart:istart+2*istep-1 238c328eeadSBarry Smith of ihlp1 and vhlp1 239c328eeadSBarry Smith */ 240b2f1ab58SBarry Smith for (istart=0; istart<nrows; istart+=2*istep) 241b2f1ab58SBarry Smith { 242b2f1ab58SBarry Smith 243c328eeadSBarry Smith /* Set counters and bound array part endings */ 244b2f1ab58SBarry Smith i1=istart; i1end = i1+istep; if (i1end>nrows) {i1end=nrows;} 245b2f1ab58SBarry Smith i2=istart+istep; i2end = i2+istep; if (i2end>nrows) {i2end=nrows;} 246b2f1ab58SBarry Smith 247c328eeadSBarry Smith /* Merge the two array parts */ 248b2f1ab58SBarry Smith for (i=istart; i<i2end; i++) 249b2f1ab58SBarry Smith { 250b2f1ab58SBarry Smith if (i1<i1end && i2<i2end && 251b2f1ab58SBarry Smith spbas_row_order_icol( ihlp2[i1], ihlp2[i2], 252b2f1ab58SBarry Smith irow_in, icol_in, col_idx_type) < 0) 253b2f1ab58SBarry Smith { 254b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 255b2f1ab58SBarry Smith i1++; 256b2f1ab58SBarry Smith } 257b2f1ab58SBarry Smith else if (i2<i2end ) 258b2f1ab58SBarry Smith { 259b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i2]; 260b2f1ab58SBarry Smith i2++; 261b2f1ab58SBarry Smith } 262b2f1ab58SBarry Smith else 263b2f1ab58SBarry Smith { 264b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 265b2f1ab58SBarry Smith i1++; 266b2f1ab58SBarry Smith } 267b2f1ab58SBarry Smith } 268b2f1ab58SBarry Smith } 269b2f1ab58SBarry Smith 270c328eeadSBarry Smith /* Swap the two array sets */ 271b2f1ab58SBarry Smith iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap; 272b2f1ab58SBarry Smith } 273b2f1ab58SBarry Smith 274c328eeadSBarry Smith /* Copy one more time in case the sorted arrays are the temporary ones */ 275b2f1ab58SBarry Smith if (ihlp2 != isort) 276b2f1ab58SBarry Smith { 277b2f1ab58SBarry Smith for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; } 278b2f1ab58SBarry Smith } 279b2f1ab58SBarry Smith 280b2f1ab58SBarry Smith ierr = PetscFree(ialloc);CHKERRQ(ierr); 281b2f1ab58SBarry Smith PetscFunctionReturn(0); 282b2f1ab58SBarry Smith } 283b2f1ab58SBarry Smith 284b2f1ab58SBarry Smith 285b2f1ab58SBarry Smith 286b2f1ab58SBarry Smith /* 287b2f1ab58SBarry Smith spbas_compress_pattern: 288b2f1ab58SBarry Smith calculate a compressed sparseness pattern for a sparseness pattern 289b2f1ab58SBarry Smith given in compressed row storage. The compressed sparseness pattern may 290b2f1ab58SBarry Smith require (much) less memory. 291b2f1ab58SBarry Smith */ 292b2f1ab58SBarry Smith #undef __FUNCT__ 293b2f1ab58SBarry Smith #define __FUNCT__ "spbas_compress_pattern" 294b2f1ab58SBarry Smith PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, 2959767453cSBarry Smith PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction) 296b2f1ab58SBarry Smith { 297b2f1ab58SBarry Smith PetscInt nnz = irow_in[nrows]; 298b2f1ab58SBarry Smith long int mem_orig = (nrows + nnz) * sizeof(PetscInt); 299b2f1ab58SBarry Smith long int mem_compressed; 300b2f1ab58SBarry Smith PetscErrorCode ierr; 301b2f1ab58SBarry Smith PetscInt *isort; 302b2f1ab58SBarry Smith PetscInt *icols; 303b2f1ab58SBarry Smith PetscInt row_nnz; 304b2f1ab58SBarry Smith PetscInt *ipoint; 305b2f1ab58SBarry Smith PetscTruth *used; 306b2f1ab58SBarry Smith PetscInt ptr; 307b2f1ab58SBarry Smith PetscInt i,j; 308b2f1ab58SBarry Smith 309b2f1ab58SBarry Smith const PetscTruth no_values = PETSC_FALSE; 310b2f1ab58SBarry Smith 311b2f1ab58SBarry Smith PetscFunctionBegin; 312b2f1ab58SBarry Smith 313c328eeadSBarry Smith /* Allocate the structure of the new matrix */ 314b2f1ab58SBarry Smith B->nrows = nrows; 315b2f1ab58SBarry Smith B->ncols = ncols; 316b2f1ab58SBarry Smith B->nnz = nnz; 317b2f1ab58SBarry Smith B->col_idx_type= col_idx_type; 318b2f1ab58SBarry Smith B->block_data = PETSC_TRUE; 319b2f1ab58SBarry Smith ierr = spbas_allocate_pattern( B, no_values);CHKERRQ(ierr); 320b2f1ab58SBarry Smith 321c328eeadSBarry Smith /* When using an offset array, set it */ 322b2f1ab58SBarry Smith if (col_idx_type==SPBAS_OFFSET_ARRAY) 323b2f1ab58SBarry Smith { 324b2f1ab58SBarry Smith for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];} 325b2f1ab58SBarry Smith } 326b2f1ab58SBarry Smith 327c328eeadSBarry Smith /* Allocate the ordering for the rows */ 328b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort);CHKERRQ(ierr); 329b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint);CHKERRQ(ierr); 330b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used);CHKERRQ(ierr); 331b2f1ab58SBarry Smith 332c328eeadSBarry Smith /* Initialize the sorting */ 333b2f1ab58SBarry Smith memset((void*) used, 0, nrows*sizeof(PetscTruth)); 334b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 335b2f1ab58SBarry Smith { 336b2f1ab58SBarry Smith B->row_nnz[i] = irow_in[i+1]-irow_in[i]; 337b2f1ab58SBarry Smith isort[i] = i; 338b2f1ab58SBarry Smith ipoint[i]= i; 339b2f1ab58SBarry Smith } 340b2f1ab58SBarry Smith 341c328eeadSBarry Smith /* Sort the rows so that identical columns will be next to each other */ 342b2f1ab58SBarry Smith ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr); 34371d55d18SBarry Smith ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr); 344b2f1ab58SBarry Smith 345c328eeadSBarry Smith /* Replace identical rows with the first one in the list */ 346b2f1ab58SBarry Smith for (i=1; i<nrows; i++) 347b2f1ab58SBarry Smith { 348b2f1ab58SBarry Smith if (spbas_row_order_icol(isort[i-1], isort[i], 349b2f1ab58SBarry Smith irow_in, icol_in, col_idx_type) == 0) 350b2f1ab58SBarry Smith { 351b2f1ab58SBarry Smith ipoint[isort[i]] = ipoint[isort[i-1]]; 352b2f1ab58SBarry Smith } 353b2f1ab58SBarry Smith } 354b2f1ab58SBarry Smith 355c328eeadSBarry Smith /* Collect the rows which are used*/ 356b2f1ab58SBarry Smith for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;} 357b2f1ab58SBarry Smith 358c328eeadSBarry Smith /* Calculate needed memory */ 359b2f1ab58SBarry Smith B->n_alloc_icol = 0; 360b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 361b2f1ab58SBarry Smith { 362b2f1ab58SBarry Smith if (used[i]) {B->n_alloc_icol += B->row_nnz[i];} 363b2f1ab58SBarry Smith } 364b2f1ab58SBarry Smith 36571d55d18SBarry Smith ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);CHKERRQ(ierr); 366b2f1ab58SBarry Smith 367c328eeadSBarry Smith /* Fill in the diagonal offsets for the rows which store their own data */ 368b2f1ab58SBarry Smith ptr = 0; 369b2f1ab58SBarry Smith for (i=0; i<B->nrows; i++) 370b2f1ab58SBarry Smith { 371b2f1ab58SBarry Smith if (used[i]) 372b2f1ab58SBarry Smith { 373b2f1ab58SBarry Smith B->icols[i] = &B->alloc_icol[ptr]; 374b2f1ab58SBarry Smith icols = &icol_in[irow_in[i]]; 375b2f1ab58SBarry Smith row_nnz = B->row_nnz[i]; 376b2f1ab58SBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) 377b2f1ab58SBarry Smith { 378b2f1ab58SBarry Smith for (j=0; j<row_nnz; j++) 379b2f1ab58SBarry Smith { 380b2f1ab58SBarry Smith B->icols[i][j] = icols[j]; 381b2f1ab58SBarry Smith } 382b2f1ab58SBarry Smith } 383b2f1ab58SBarry Smith else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 384b2f1ab58SBarry Smith { 385b2f1ab58SBarry Smith for (j=0; j<row_nnz; j++) 386b2f1ab58SBarry Smith { 387b2f1ab58SBarry Smith B->icols[i][j] = icols[j]-i; 388b2f1ab58SBarry Smith } 389b2f1ab58SBarry Smith } 390b2f1ab58SBarry Smith else if (col_idx_type == SPBAS_OFFSET_ARRAY) 391b2f1ab58SBarry Smith { 392b2f1ab58SBarry Smith for (j=0; j<row_nnz; j++) 393b2f1ab58SBarry Smith { 394b2f1ab58SBarry Smith B->icols[i][j] = icols[j]-icols[0]; 395b2f1ab58SBarry Smith } 396b2f1ab58SBarry Smith } 397b2f1ab58SBarry Smith ptr += B->row_nnz[i]; 398b2f1ab58SBarry Smith } 399b2f1ab58SBarry Smith } 400b2f1ab58SBarry Smith 401c328eeadSBarry Smith /* Point to the right places for all data */ 402b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 403b2f1ab58SBarry Smith { 404b2f1ab58SBarry Smith B->icols[i] = B->icols[ipoint[i]]; 405b2f1ab58SBarry Smith } 406c328eeadSBarry Smith ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr); 407c328eeadSBarry Smith ierr = PetscInfo1(PETSC_NULL," (%G nonzeros per row)\n", (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr); 408b2f1ab58SBarry Smith 409b2f1ab58SBarry Smith ierr=PetscFree(isort);CHKERRQ(ierr); 410b2f1ab58SBarry Smith ierr=PetscFree(used);CHKERRQ(ierr); 411b2f1ab58SBarry Smith ierr=PetscFree(ipoint);CHKERRQ(ierr); 412b2f1ab58SBarry Smith 413b2f1ab58SBarry Smith mem_compressed = spbas_memory_requirement( *B ); 4149767453cSBarry Smith *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ 4159767453cSBarry Smith (PetscReal) mem_orig; 416b2f1ab58SBarry Smith PetscFunctionReturn(0); 417b2f1ab58SBarry Smith } 418b2f1ab58SBarry Smith 419b2f1ab58SBarry Smith /* 420b2f1ab58SBarry Smith spbas_incomplete_cholesky 421b2f1ab58SBarry Smith Incomplete Cholesky decomposition 422b2f1ab58SBarry Smith */ 423b2f1ab58SBarry Smith #include "spbas_cholesky.h" 424b2f1ab58SBarry Smith 425b2f1ab58SBarry Smith /* 426b2f1ab58SBarry Smith spbas_delete : de-allocate the arrays owned by this matrix 427b2f1ab58SBarry Smith */ 428b2f1ab58SBarry Smith #undef __FUNCT__ 429b2f1ab58SBarry Smith #define __FUNCT__ "spbas_delete" 430b2f1ab58SBarry Smith PetscErrorCode spbas_delete(spbas_matrix matrix) 431b2f1ab58SBarry Smith { 432b2f1ab58SBarry Smith PetscInt i; 433b2f1ab58SBarry Smith PetscErrorCode ierr; 4349ccc4a9bSBarry Smith 435b2f1ab58SBarry Smith PetscFunctionBegin; 4369ccc4a9bSBarry Smith if (matrix.block_data) { 437b2f1ab58SBarry Smith ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr) 438b2f1ab58SBarry Smith if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);} 4399ccc4a9bSBarry Smith } else { 4409ccc4a9bSBarry Smith for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);} 441b2f1ab58SBarry Smith ierr = PetscFree(matrix.icols);CHKERRQ(ierr); 4429ccc4a9bSBarry Smith if (matrix.values) { 4439ccc4a9bSBarry Smith for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);} 444b2f1ab58SBarry Smith } 445b2f1ab58SBarry Smith } 446b2f1ab58SBarry Smith 447b2f1ab58SBarry Smith ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr); 448b2f1ab58SBarry Smith ierr=PetscFree(matrix.icols);CHKERRQ(ierr); 4499ccc4a9bSBarry Smith if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);} 4509ccc4a9bSBarry Smith if (matrix.values) {ierr=PetscFree(matrix.values);CHKERRQ(ierr);} 451b2f1ab58SBarry Smith PetscFunctionReturn(0); 452b2f1ab58SBarry Smith } 453b2f1ab58SBarry Smith 454b2f1ab58SBarry Smith /* 455b2f1ab58SBarry Smith spbas_matrix_to_crs: 456b2f1ab58SBarry Smith Convert an spbas_matrix to compessed row storage 457b2f1ab58SBarry Smith */ 458b2f1ab58SBarry Smith #undef __FUNCT__ 459b2f1ab58SBarry Smith #define __FUNCT__ "spbas_matrix_to_crs" 460b2f1ab58SBarry Smith PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) 461b2f1ab58SBarry Smith { 462b2f1ab58SBarry Smith PetscInt nrows = matrix_A.nrows; 463b2f1ab58SBarry Smith PetscInt nnz = matrix_A.nnz; 464b2f1ab58SBarry Smith PetscInt i,j,r_nnz,i0; 465b2f1ab58SBarry Smith PetscInt *irow; 466b2f1ab58SBarry Smith PetscInt *icol; 467b2f1ab58SBarry Smith PetscInt *icol_A; 468b2f1ab58SBarry Smith MatScalar *val; 469b2f1ab58SBarry Smith PetscScalar *val_A; 470b2f1ab58SBarry Smith PetscInt col_idx_type = matrix_A.col_idx_type; 471d36aa34eSBarry Smith PetscTruth do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE; 472b2f1ab58SBarry Smith PetscErrorCode ierr; 473b2f1ab58SBarry Smith 474b2f1ab58SBarry Smith PetscFunctionBegin; 475b2f1ab58SBarry Smith ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow);CHKERRQ(ierr); 476b2f1ab58SBarry Smith ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol);CHKERRQ(ierr); 4779ccc4a9bSBarry Smith *icol_out = icol; 4789ccc4a9bSBarry Smith *irow_out=irow; 4799ccc4a9bSBarry Smith if (do_values) { 480b2f1ab58SBarry Smith ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val);CHKERRQ(ierr); 481b2f1ab58SBarry Smith *val_out = val; *icol_out = icol; *irow_out=irow; 482b2f1ab58SBarry Smith } 483b2f1ab58SBarry Smith 484b2f1ab58SBarry Smith irow[0]=0; 4859ccc4a9bSBarry Smith for (i=0; i<nrows; i++) { 486b2f1ab58SBarry Smith r_nnz = matrix_A.row_nnz[i]; 487b2f1ab58SBarry Smith i0 = irow[i]; 488b2f1ab58SBarry Smith irow[i+1] = i0 + r_nnz; 489b2f1ab58SBarry Smith icol_A = matrix_A.icols[i]; 490b2f1ab58SBarry Smith 4919ccc4a9bSBarry Smith if (do_values) { 492b2f1ab58SBarry Smith val_A = matrix_A.values[i]; 4939ccc4a9bSBarry Smith for (j=0; j<r_nnz; j++) { 494b2f1ab58SBarry Smith icol[i0+j] = icol_A[j]; 495b2f1ab58SBarry Smith val[i0+j] = val_A[j]; 496b2f1ab58SBarry Smith } 4979ccc4a9bSBarry Smith } else { 498b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; } 499b2f1ab58SBarry Smith } 500b2f1ab58SBarry Smith 5019ccc4a9bSBarry Smith if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 502b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { icol[i0+j] += i; } 503b2f1ab58SBarry Smith } 5049ccc4a9bSBarry Smith else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 505b2f1ab58SBarry Smith i0 = matrix_A.icol0[i]; 506b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; } 507b2f1ab58SBarry Smith } 508b2f1ab58SBarry Smith } 509b2f1ab58SBarry Smith PetscFunctionReturn(0); 510b2f1ab58SBarry Smith } 511b2f1ab58SBarry Smith 512b2f1ab58SBarry Smith 513b2f1ab58SBarry Smith /* 514b2f1ab58SBarry Smith spbas_transpose 515b2f1ab58SBarry Smith return the transpose of a matrix 516b2f1ab58SBarry Smith */ 517b2f1ab58SBarry Smith #undef __FUNCT__ 518b2f1ab58SBarry Smith #define __FUNCT__ "spbas_transpose" 519b2f1ab58SBarry Smith PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result) 520b2f1ab58SBarry Smith { 521b2f1ab58SBarry Smith PetscInt col_idx_type = in_matrix.col_idx_type; 522b2f1ab58SBarry Smith PetscInt nnz = in_matrix.nnz; 523b2f1ab58SBarry Smith PetscInt ncols = in_matrix.nrows; 524b2f1ab58SBarry Smith PetscInt nrows = in_matrix.ncols; 525b2f1ab58SBarry Smith PetscInt i,j,k; 526b2f1ab58SBarry Smith PetscInt r_nnz; 527b2f1ab58SBarry Smith PetscInt *irow; 528b2f1ab58SBarry Smith PetscInt icol0; 529b2f1ab58SBarry Smith PetscScalar * val; 530b2f1ab58SBarry Smith PetscErrorCode ierr; 531b2f1ab58SBarry Smith PetscFunctionBegin; 532b2f1ab58SBarry Smith 533c328eeadSBarry Smith /* Copy input values */ 534b2f1ab58SBarry Smith result->nrows = nrows; 535b2f1ab58SBarry Smith result->ncols = ncols; 536b2f1ab58SBarry Smith result->nnz = nnz; 537b2f1ab58SBarry Smith result->col_idx_type = SPBAS_COLUMN_NUMBERS; 538b2f1ab58SBarry Smith result->block_data = PETSC_TRUE; 539b2f1ab58SBarry Smith 540c328eeadSBarry Smith /* Allocate sparseness pattern */ 54171d55d18SBarry Smith ierr = spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr); 542b2f1ab58SBarry Smith 543c328eeadSBarry Smith /* Count the number of nonzeros in each row */ 544b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; } 545b2f1ab58SBarry Smith 5469ccc4a9bSBarry Smith for (i=0; i<ncols; i++) { 547b2f1ab58SBarry Smith r_nnz = in_matrix.row_nnz[i]; 548b2f1ab58SBarry Smith irow = in_matrix.icols[i]; 5499ccc4a9bSBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 550b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; } 5519ccc4a9bSBarry Smith } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 552b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; } 5539ccc4a9bSBarry Smith } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 554b2f1ab58SBarry Smith icol0=in_matrix.icol0[i]; 555b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; } 556b2f1ab58SBarry Smith } 557b2f1ab58SBarry Smith } 558b2f1ab58SBarry Smith 559c328eeadSBarry Smith /* Set the pointers to the data */ 560b2f1ab58SBarry Smith ierr = spbas_allocate_data(result);CHKERRQ(ierr); 561b2f1ab58SBarry Smith 562c328eeadSBarry Smith /* Reset the number of nonzeros in each row */ 563b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; } 564b2f1ab58SBarry Smith 565c328eeadSBarry Smith /* Fill the data arrays */ 5669ccc4a9bSBarry Smith if (in_matrix.values) { 5679ccc4a9bSBarry Smith for (i=0; i<ncols; i++) { 568b2f1ab58SBarry Smith r_nnz = in_matrix.row_nnz[i]; 569b2f1ab58SBarry Smith irow = in_matrix.icols[i]; 570b2f1ab58SBarry Smith val = in_matrix.values[i]; 571b2f1ab58SBarry Smith 572b2f1ab58SBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) {icol0=0;} 573b2f1ab58SBarry Smith else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;} 574b2f1ab58SBarry Smith else if (col_idx_type == SPBAS_OFFSET_ARRAY) 575b2f1ab58SBarry Smith {icol0=in_matrix.icol0[i];} 5769ccc4a9bSBarry Smith for (j=0; j<r_nnz; j++) { 577b2f1ab58SBarry Smith k = icol0 + irow[j]; 578b2f1ab58SBarry Smith result->icols[k][result->row_nnz[k]] = i; 579b2f1ab58SBarry Smith result->values[k][result->row_nnz[k]] = val[j]; 580b2f1ab58SBarry Smith result->row_nnz[k]++; 581b2f1ab58SBarry Smith } 582b2f1ab58SBarry Smith } 5839ccc4a9bSBarry Smith } else { 5849ccc4a9bSBarry Smith for (i=0; i<ncols; i++) { 585b2f1ab58SBarry Smith r_nnz = in_matrix.row_nnz[i]; 586b2f1ab58SBarry Smith irow = in_matrix.icols[i]; 587b2f1ab58SBarry Smith 588b2f1ab58SBarry Smith if (col_idx_type == SPBAS_COLUMN_NUMBERS) {icol0=0;} 589b2f1ab58SBarry Smith else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;} 5909ccc4a9bSBarry Smith else if (col_idx_type == SPBAS_OFFSET_ARRAY) {icol0=in_matrix.icol0[i];} 591b2f1ab58SBarry Smith 5929ccc4a9bSBarry Smith for (j=0; j<r_nnz; j++) { 593b2f1ab58SBarry Smith k = icol0 + irow[j]; 594b2f1ab58SBarry Smith result->icols[k][result->row_nnz[k]] = i; 595b2f1ab58SBarry Smith result->row_nnz[k]++; 596b2f1ab58SBarry Smith } 597b2f1ab58SBarry Smith } 598b2f1ab58SBarry Smith } 599b2f1ab58SBarry Smith PetscFunctionReturn(0); 600b2f1ab58SBarry Smith } 601b2f1ab58SBarry Smith 602b2f1ab58SBarry Smith /* 603b2f1ab58SBarry Smith spbas_mergesort 604b2f1ab58SBarry Smith 605b2f1ab58SBarry Smith mergesort for an array of intergers and an array of associated 606b2f1ab58SBarry Smith reals 607b2f1ab58SBarry Smith 608b2f1ab58SBarry Smith on output, icol[0..nnz-1] is increasing; 609b2f1ab58SBarry Smith val[0..nnz-1] has undergone the same permutation as icol 610b2f1ab58SBarry Smith 611c328eeadSBarry Smith NB: val may be PETSC_NULL: in that case, only the integers are sorted 612b2f1ab58SBarry Smith 613b2f1ab58SBarry Smith */ 614b2f1ab58SBarry Smith #undef __FUNCT__ 615b2f1ab58SBarry Smith #define __FUNCT__ "spbas_mergesort" 616b2f1ab58SBarry Smith PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) 617b2f1ab58SBarry Smith { 618c328eeadSBarry Smith PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 619c328eeadSBarry Smith PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 620c328eeadSBarry Smith PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 621c328eeadSBarry Smith PetscInt *ialloc; /* Allocated arrays */ 622c328eeadSBarry Smith PetscScalar *valloc=PETSC_NULL; 623c328eeadSBarry Smith PetscInt *iswap; /* auxiliary pointers for swapping */ 624b2f1ab58SBarry Smith PetscScalar *vswap; 625c328eeadSBarry Smith PetscInt *ihlp1; /* Pointers to new version of arrays, */ 626c328eeadSBarry Smith PetscScalar *vhlp1=PETSC_NULL; /* (arrays under construction) */ 627c328eeadSBarry Smith PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 628c328eeadSBarry Smith PetscScalar *vhlp2=PETSC_NULL; 629b2f1ab58SBarry Smith PetscErrorCode ierr; 630b2f1ab58SBarry Smith 631b2f1ab58SBarry Smith ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc);CHKERRQ(ierr); 632b2f1ab58SBarry Smith ihlp1 = ialloc; 633b2f1ab58SBarry Smith ihlp2 = icol; 634b2f1ab58SBarry Smith 6359ccc4a9bSBarry Smith if (val) { 636b2f1ab58SBarry Smith ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc);CHKERRQ(ierr); 637b2f1ab58SBarry Smith vhlp1 = valloc; 638b2f1ab58SBarry Smith vhlp2 = val; 639b2f1ab58SBarry Smith } 640b2f1ab58SBarry Smith 641b2f1ab58SBarry Smith 642c328eeadSBarry Smith /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 6439ccc4a9bSBarry Smith for (istep=1; istep<nnz; istep*=2) { 644c328eeadSBarry Smith /* 645c328eeadSBarry Smith Combine sorted parts 646c328eeadSBarry Smith istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 647c328eeadSBarry Smith of ihlp2 and vhlp2 648c328eeadSBarry Smith 649c328eeadSBarry Smith into one sorted part 650c328eeadSBarry Smith istart:istart+2*istep-1 651c328eeadSBarry Smith of ihlp1 and vhlp1 652c328eeadSBarry Smith */ 6539ccc4a9bSBarry Smith for (istart=0; istart<nnz; istart+=2*istep) { 654b2f1ab58SBarry Smith 655c328eeadSBarry Smith /* Set counters and bound array part endings */ 656b2f1ab58SBarry Smith i1=istart; i1end = i1+istep; if (i1end>nnz) {i1end=nnz;} 657b2f1ab58SBarry Smith i2=istart+istep; i2end = i2+istep; if (i2end>nnz) {i2end=nnz;} 658b2f1ab58SBarry Smith 659c328eeadSBarry Smith /* Merge the two array parts */ 6609ccc4a9bSBarry Smith if (val) { 6619ccc4a9bSBarry Smith for (i=istart; i<i2end; i++) { 6629ccc4a9bSBarry Smith if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) { 663b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 664b2f1ab58SBarry Smith vhlp1[i] = vhlp2[i1]; 665b2f1ab58SBarry Smith i1++; 6669ccc4a9bSBarry Smith } else if (i2<i2end ){ 667b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i2]; 668b2f1ab58SBarry Smith vhlp1[i] = vhlp2[i2]; 669b2f1ab58SBarry Smith i2++; 6709ccc4a9bSBarry Smith } else { 671b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 672b2f1ab58SBarry Smith vhlp1[i] = vhlp2[i1]; 673b2f1ab58SBarry Smith i1++; 674b2f1ab58SBarry Smith } 675b2f1ab58SBarry Smith } 6769ccc4a9bSBarry Smith } else { 677*6322f4bdSBarry Smith for (i=istart; i<i2end; i++) { 678*6322f4bdSBarry Smith if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) { 679b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 680b2f1ab58SBarry Smith i1++; 681*6322f4bdSBarry Smith } else if (i2<i2end ) { 682b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i2]; 683b2f1ab58SBarry Smith i2++; 684*6322f4bdSBarry Smith } else { 685b2f1ab58SBarry Smith ihlp1[i] = ihlp2[i1]; 686b2f1ab58SBarry Smith i1++; 687b2f1ab58SBarry Smith } 688b2f1ab58SBarry Smith } 689b2f1ab58SBarry Smith } 690b2f1ab58SBarry Smith } 691b2f1ab58SBarry Smith 692c328eeadSBarry Smith /* Swap the two array sets */ 693b2f1ab58SBarry Smith iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap; 694b2f1ab58SBarry Smith vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap; 695b2f1ab58SBarry Smith } 696b2f1ab58SBarry Smith 697c328eeadSBarry Smith /* Copy one more time in case the sorted arrays are the temporary ones */ 698*6322f4bdSBarry Smith if (ihlp2 != icol) { 699b2f1ab58SBarry Smith for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; } 700*6322f4bdSBarry Smith if (val) { 701b2f1ab58SBarry Smith for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; } 702b2f1ab58SBarry Smith } 703b2f1ab58SBarry Smith } 704b2f1ab58SBarry Smith 705b2f1ab58SBarry Smith ierr = PetscFree(ialloc);CHKERRQ(ierr); 706b2f1ab58SBarry Smith if(val){ierr = PetscFree(valloc);CHKERRQ(ierr);} 707b2f1ab58SBarry Smith PetscFunctionReturn(0); 708b2f1ab58SBarry Smith } 709b2f1ab58SBarry Smith 710b2f1ab58SBarry Smith /* 711b2f1ab58SBarry Smith spbas_apply_reordering_rows: 712b2f1ab58SBarry Smith apply the given reordering to the rows: matrix_A = matrix_A(perm,:); 713b2f1ab58SBarry Smith */ 714b2f1ab58SBarry Smith #undef __FUNCT__ 715b2f1ab58SBarry Smith #define __FUNCT__ "spbas_apply_reordering_rows" 716b2f1ab58SBarry Smith PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) 717b2f1ab58SBarry Smith { 718b2f1ab58SBarry Smith PetscInt i,j,ip; 719b2f1ab58SBarry Smith PetscInt nrows=matrix_A->nrows; 720b2f1ab58SBarry Smith PetscInt * row_nnz; 721b2f1ab58SBarry Smith PetscInt **icols; 722d36aa34eSBarry Smith PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 723c328eeadSBarry Smith PetscScalar **vals=PETSC_NULL; 724b2f1ab58SBarry Smith PetscErrorCode ierr; 725b2f1ab58SBarry Smith 726b2f1ab58SBarry Smith PetscFunctionBegin; 727*6322f4bdSBarry Smith if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n"); 728b2f1ab58SBarry Smith 729*6322f4bdSBarry Smith if (do_values) { 730b2f1ab58SBarry Smith ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals);CHKERRQ(ierr); 731b2f1ab58SBarry Smith } 732b2f1ab58SBarry Smith ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz);CHKERRQ(ierr); 733b2f1ab58SBarry Smith ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols);CHKERRQ(ierr); 734b2f1ab58SBarry Smith 735*6322f4bdSBarry Smith for (i=0; i<nrows;i++) { 736b2f1ab58SBarry Smith ip = permutation[i]; 737b2f1ab58SBarry Smith if (do_values) {vals[i] = matrix_A->values[ip];} 738b2f1ab58SBarry Smith icols[i] = matrix_A->icols[ip]; 739b2f1ab58SBarry Smith row_nnz[i] = matrix_A->row_nnz[ip]; 740b2f1ab58SBarry Smith for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; } 741b2f1ab58SBarry Smith } 742b2f1ab58SBarry Smith 743b2f1ab58SBarry Smith if (do_values){ ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);} 744b2f1ab58SBarry Smith ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr); 745b2f1ab58SBarry Smith ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr); 746b2f1ab58SBarry Smith 747b2f1ab58SBarry Smith if (do_values) { matrix_A->values = vals; } 748b2f1ab58SBarry Smith matrix_A->icols = icols; 749b2f1ab58SBarry Smith matrix_A->row_nnz = row_nnz; 750b2f1ab58SBarry Smith 751b2f1ab58SBarry Smith PetscFunctionReturn(0); 752b2f1ab58SBarry Smith } 753b2f1ab58SBarry Smith 754b2f1ab58SBarry Smith 755b2f1ab58SBarry Smith /* 756b2f1ab58SBarry Smith spbas_apply_reordering_cols: 757b2f1ab58SBarry Smith apply the given reordering to the columns: matrix_A(:,perm) = matrix_A; 758b2f1ab58SBarry Smith */ 759b2f1ab58SBarry Smith #undef __FUNCT__ 760b2f1ab58SBarry Smith #define __FUNCT__ "spbas_apply_reordering_cols" 761b2f1ab58SBarry Smith PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation) 762b2f1ab58SBarry Smith { 763b2f1ab58SBarry Smith PetscInt i,j; 764b2f1ab58SBarry Smith PetscInt nrows=matrix_A->nrows; 765b2f1ab58SBarry Smith PetscInt row_nnz; 766b2f1ab58SBarry Smith PetscInt *icols; 767d36aa34eSBarry Smith PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 768c328eeadSBarry Smith PetscScalar *vals=PETSC_NULL; 769b2f1ab58SBarry Smith PetscErrorCode ierr; 770b2f1ab58SBarry Smith 771b2f1ab58SBarry Smith PetscFunctionBegin; 772*6322f4bdSBarry Smith if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n"); 773b2f1ab58SBarry Smith 774*6322f4bdSBarry Smith for (i=0; i<nrows;i++) { 775b2f1ab58SBarry Smith icols = matrix_A->icols[i]; 776b2f1ab58SBarry Smith row_nnz = matrix_A->row_nnz[i]; 777b2f1ab58SBarry Smith if (do_values) { vals = matrix_A->values[i]; } 778b2f1ab58SBarry Smith 779*6322f4bdSBarry Smith for (j=0; j<row_nnz; j++) { 780b2f1ab58SBarry Smith icols[j] = permutation[i+icols[j]]-i; 781b2f1ab58SBarry Smith } 782b2f1ab58SBarry Smith ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr); 783b2f1ab58SBarry Smith } 784b2f1ab58SBarry Smith PetscFunctionReturn(0); 785b2f1ab58SBarry Smith } 786b2f1ab58SBarry Smith 787b2f1ab58SBarry Smith /* 788b2f1ab58SBarry Smith spbas_apply_reordering: 789b2f1ab58SBarry Smith apply the given reordering: matrix_A(perm,perm) = matrix_A; 790b2f1ab58SBarry Smith */ 791b2f1ab58SBarry Smith #undef __FUNCT__ 792b2f1ab58SBarry Smith #define __FUNCT__ "spbas_apply_reordering" 793b2f1ab58SBarry Smith PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm) 794b2f1ab58SBarry Smith { 795b2f1ab58SBarry Smith PetscErrorCode ierr; 796b2f1ab58SBarry Smith PetscFunctionBegin; 797b2f1ab58SBarry Smith ierr = spbas_apply_reordering_rows( matrix_A, inv_perm);CHKERRQ(ierr); 798b2f1ab58SBarry Smith ierr = spbas_apply_reordering_cols( matrix_A, permutation);CHKERRQ(ierr); 799b2f1ab58SBarry Smith PetscFunctionReturn(0); 800b2f1ab58SBarry Smith } 801b2f1ab58SBarry Smith 802b2f1ab58SBarry Smith #undef __FUNCT__ 803b2f1ab58SBarry Smith #define __FUNCT__ "spbas_pattern_only" 804b2f1ab58SBarry Smith PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result) 805b2f1ab58SBarry Smith { 806b2f1ab58SBarry Smith spbas_matrix retval; 807b2f1ab58SBarry Smith PetscInt i, j, i0, r_nnz; 808b2f1ab58SBarry Smith PetscErrorCode ierr; 809b2f1ab58SBarry Smith 810b2f1ab58SBarry Smith PetscFunctionBegin; 811b2f1ab58SBarry Smith 812c328eeadSBarry Smith /* Copy input values */ 813b2f1ab58SBarry Smith retval.nrows = nrows; 814b2f1ab58SBarry Smith retval.ncols = ncols; 815b2f1ab58SBarry Smith retval.nnz = ai[nrows]; 816b2f1ab58SBarry Smith 817b2f1ab58SBarry Smith retval.block_data = PETSC_TRUE; 818b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 819b2f1ab58SBarry Smith 820c328eeadSBarry Smith /* Allocate output matrix */ 821b2f1ab58SBarry Smith ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr); 822b2f1ab58SBarry Smith for (i=0; i<nrows; i++) {retval.row_nnz[i] = ai[i+1]-ai[i];} 823b2f1ab58SBarry Smith ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 824c328eeadSBarry Smith /* Copy the structure */ 825*6322f4bdSBarry Smith for (i = 0; i<retval.nrows; i++) { 826b2f1ab58SBarry Smith i0 = ai[i]; 827b2f1ab58SBarry Smith r_nnz = ai[i+1]-i0; 828b2f1ab58SBarry Smith 829*6322f4bdSBarry Smith for (j=0; j<r_nnz; j++) { 830b2f1ab58SBarry Smith retval.icols[i][j] = aj[i0+j]-i; 831b2f1ab58SBarry Smith } 832b2f1ab58SBarry Smith } 833b2f1ab58SBarry Smith *result = retval; 834b2f1ab58SBarry Smith PetscFunctionReturn(0); 835b2f1ab58SBarry Smith } 836b2f1ab58SBarry Smith 837b2f1ab58SBarry Smith 838b2f1ab58SBarry Smith /* 839b2f1ab58SBarry Smith spbas_mark_row_power: 840b2f1ab58SBarry Smith Mark the columns in row 'row' which are nonzero in 841b2f1ab58SBarry Smith matrix^2log(marker). 842b2f1ab58SBarry Smith */ 843b2f1ab58SBarry Smith #undef __FUNCT__ 844b2f1ab58SBarry Smith #define __FUNCT__ "spbas_mark_row_power" 845b2f1ab58SBarry Smith PetscErrorCode spbas_mark_row_power( 846c328eeadSBarry Smith PetscInt *iwork, /* marker-vector */ 847c328eeadSBarry Smith PetscInt row, /* row for which the columns are marked */ 848c328eeadSBarry Smith spbas_matrix * in_matrix, /* matrix for which the power is being calculated */ 849c328eeadSBarry Smith PetscInt marker, /* marker-value: 2^power */ 850c328eeadSBarry Smith PetscInt minmrk, /* lower bound for marked points */ 851c328eeadSBarry Smith PetscInt maxmrk) /* upper bound for marked points */ 852b2f1ab58SBarry Smith { 853b2f1ab58SBarry Smith PetscErrorCode ierr; 854b2f1ab58SBarry Smith PetscInt i,j, nnz; 855b2f1ab58SBarry Smith 856b2f1ab58SBarry Smith PetscFunctionBegin; 857b2f1ab58SBarry Smith nnz = in_matrix->row_nnz[row]; 858b2f1ab58SBarry Smith 859c328eeadSBarry Smith /* For higher powers, call this function recursively */ 860*6322f4bdSBarry Smith if (marker>1) { 861*6322f4bdSBarry Smith for (i=0; i<nnz;i++) { 862b2f1ab58SBarry Smith j = row + in_matrix->icols[row][i]; 863*6322f4bdSBarry Smith if (minmrk<=j && j<maxmrk && iwork[j] < marker ) { 86471d55d18SBarry Smith ierr = spbas_mark_row_power( iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr); 865b2f1ab58SBarry Smith iwork[j] |= marker; 866b2f1ab58SBarry Smith } 867b2f1ab58SBarry Smith } 868*6322f4bdSBarry Smith } else { 869c328eeadSBarry Smith /* Mark the columns reached */ 870*6322f4bdSBarry Smith for (i=0; i<nnz;i++) { 871b2f1ab58SBarry Smith j = row + in_matrix->icols[row][i]; 872b2f1ab58SBarry Smith if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; } 873b2f1ab58SBarry Smith } 874b2f1ab58SBarry Smith } 875b2f1ab58SBarry Smith PetscFunctionReturn(0); 876b2f1ab58SBarry Smith } 877b2f1ab58SBarry Smith 878b2f1ab58SBarry Smith 879b2f1ab58SBarry Smith /* 880b2f1ab58SBarry Smith spbas_power 881b2f1ab58SBarry Smith Calculate sparseness patterns for incomplete Cholesky decompositions 882b2f1ab58SBarry Smith of a given order: (almost) all nonzeros of the matrix^(order+1) which 883b2f1ab58SBarry Smith are inside the band width are found and stored in the output sparseness 884b2f1ab58SBarry Smith pattern. 885b2f1ab58SBarry Smith */ 886b2f1ab58SBarry Smith #undef __FUNCT__ 887b2f1ab58SBarry Smith #define __FUNCT__ "spbas_power" 888b2f1ab58SBarry Smith PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result) 889b2f1ab58SBarry Smith { 890b2f1ab58SBarry Smith spbas_matrix retval; 891b2f1ab58SBarry Smith PetscInt nrows = in_matrix.nrows; 892b2f1ab58SBarry Smith PetscInt ncols = in_matrix.ncols; 893b2f1ab58SBarry Smith PetscInt i, j, kend; 894b2f1ab58SBarry Smith PetscInt nnz, inz; 895b2f1ab58SBarry Smith PetscInt *iwork; 896b2f1ab58SBarry Smith PetscInt marker; 897b2f1ab58SBarry Smith PetscInt maxmrk=0; 898b2f1ab58SBarry Smith PetscErrorCode ierr; 899b2f1ab58SBarry Smith 900b2f1ab58SBarry Smith PetscFunctionBegin; 901*6322f4bdSBarry Smith if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n"); 902*6322f4bdSBarry Smith if (ncols != nrows) SETERRQ( PETSC_ERR_ARG_INCOMP, "Dimension error\n"); 903*6322f4bdSBarry Smith if (in_matrix.values) SETERRQ( PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)"); 904*6322f4bdSBarry Smith if (power<=0) SETERRQ( PETSC_ERR_SUP_SYS, "Power must be 1 or up"); 905b2f1ab58SBarry Smith 906c328eeadSBarry Smith /* Copy input values*/ 907b2f1ab58SBarry Smith retval.nrows = ncols; 908b2f1ab58SBarry Smith retval.ncols = nrows; 909b2f1ab58SBarry Smith retval.nnz = 0; 910b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 911b2f1ab58SBarry Smith retval.block_data = PETSC_FALSE; 912b2f1ab58SBarry Smith 913c328eeadSBarry Smith /* Allocate sparseness pattern */ 91471d55d18SBarry Smith ierr = spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr); 915b2f1ab58SBarry Smith 916c328eeadSBarry Smith /* Allocate marker array */ 917b2f1ab58SBarry Smith ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork);CHKERRQ(ierr); 918b2f1ab58SBarry Smith 919c328eeadSBarry Smith /* Erase the pattern for this row */ 920b2f1ab58SBarry Smith memset( (void *) iwork, 0, retval.nrows*sizeof(PetscInt)); 921b2f1ab58SBarry Smith 922c328eeadSBarry Smith /* Calculate marker values */ 923b2f1ab58SBarry Smith marker = 1; for (i=1; i<power; i++) {marker*=2;} 924b2f1ab58SBarry Smith 925*6322f4bdSBarry Smith for (i=0; i<nrows; i++) { 926c328eeadSBarry Smith /* Calculate the pattern for each row */ 927b2f1ab58SBarry Smith 928b2f1ab58SBarry Smith nnz = in_matrix.row_nnz[i]; 929b2f1ab58SBarry Smith kend = i+in_matrix.icols[i][nnz-1]; 930b2f1ab58SBarry Smith if (maxmrk<=kend) {maxmrk=kend+1;} 93171d55d18SBarry Smith ierr = spbas_mark_row_power( iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr); 932b2f1ab58SBarry Smith 933c328eeadSBarry Smith /* Count the columns*/ 934b2f1ab58SBarry Smith nnz = 0; 935b2f1ab58SBarry Smith for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);} 936b2f1ab58SBarry Smith 937c328eeadSBarry Smith /* Allocate the column indices */ 938b2f1ab58SBarry Smith retval.row_nnz[i] = nnz; 939b2f1ab58SBarry Smith ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]);CHKERRQ(ierr); 940b2f1ab58SBarry Smith 941c328eeadSBarry Smith /* Administrate the column indices */ 942b2f1ab58SBarry Smith inz = 0; 943*6322f4bdSBarry Smith for (j=i; j<maxmrk; j++) { 944*6322f4bdSBarry Smith if (iwork[j]) { 945b2f1ab58SBarry Smith retval.icols[i][inz] = j-i; 946b2f1ab58SBarry Smith inz++; 947b2f1ab58SBarry Smith iwork[j]=0; 948b2f1ab58SBarry Smith } 949b2f1ab58SBarry Smith } 950b2f1ab58SBarry Smith retval.nnz += nnz; 951b2f1ab58SBarry Smith }; 952b2f1ab58SBarry Smith ierr = PetscFree(iwork);CHKERRQ(ierr); 953b2f1ab58SBarry Smith *result = retval; 954b2f1ab58SBarry Smith PetscFunctionReturn(0); 955b2f1ab58SBarry Smith } 956b2f1ab58SBarry Smith 957b2f1ab58SBarry Smith 958b2f1ab58SBarry Smith 959b2f1ab58SBarry Smith /* 960b2f1ab58SBarry Smith spbas_keep_upper: 961b2f1ab58SBarry Smith remove the lower part of the matrix: keep the upper part 962b2f1ab58SBarry Smith */ 963b2f1ab58SBarry Smith #undef __FUNCT__ 964b2f1ab58SBarry Smith #define __FUNCT__ "spbas_keep_upper" 965b2f1ab58SBarry Smith PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix) 966b2f1ab58SBarry Smith { 967b2f1ab58SBarry Smith PetscInt i, j; 968b2f1ab58SBarry Smith PetscInt jstart; 969b2f1ab58SBarry Smith 970b2f1ab58SBarry Smith PetscFunctionBegin; 971*6322f4bdSBarry Smith if (inout_matrix->block_data) SETERRQ( PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n"); 972*6322f4bdSBarry Smith for (i=0; i<inout_matrix->nrows; i++) { 973*6322f4bdSBarry Smith for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++){} 974*6322f4bdSBarry Smith if (jstart>0) { 975*6322f4bdSBarry Smith for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) { 976*6322f4bdSBarry Smith inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart]; 977b2f1ab58SBarry Smith } 978b2f1ab58SBarry Smith 979*6322f4bdSBarry Smith if (inout_matrix->values) { 980*6322f4bdSBarry Smith for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) { 981*6322f4bdSBarry Smith inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart]; 982b2f1ab58SBarry Smith } 983b2f1ab58SBarry Smith } 984b2f1ab58SBarry Smith 985b2f1ab58SBarry Smith inout_matrix->row_nnz[i] -= jstart; 986b2f1ab58SBarry Smith 987*6322f4bdSBarry Smith inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt)); 988b2f1ab58SBarry Smith 989*6322f4bdSBarry Smith if (inout_matrix->values) { 990*6322f4bdSBarry Smith inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar)); 991b2f1ab58SBarry Smith } 992b2f1ab58SBarry Smith inout_matrix->nnz -= jstart; 993b2f1ab58SBarry Smith } 994b2f1ab58SBarry Smith } 995b2f1ab58SBarry Smith PetscFunctionReturn(0); 996b2f1ab58SBarry Smith } 997b2f1ab58SBarry Smith 998