1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i; 40 41 PetscFunctionBegin; 42 if (!a->diag) { 43 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 } 45 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 if (!blockcompressed) { 61 /* malloc & create the natural set of indices */ 62 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 63 for (i=0; i<n+1; i++) { 64 for (j=0; j<bs; j++) { 65 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 66 } 67 } 68 for (i=0; i<nz; i++) { 69 for (j=0; j<bs; j++) { 70 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 71 } 72 } 73 } else { /* blockcompressed */ 74 if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 for (i=0; i<nz; i++) a->j[i]++; 77 for (i=0; i<n+1; i++) a->i[i]++; 78 } 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 88 { 89 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 90 PetscInt i,n = a->mbs,nz = a->i[n]; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 96 if (!blockcompressed) { 97 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { /* blockcompressed */ 99 for (i=0; i<nz; i++) a->j[i]--; 100 for (i=0; i<n+1; i++) a->i[i]--; 101 } 102 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 116 #endif 117 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 118 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 119 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 120 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 121 ierr = PetscFree(a->diag);CHKERRQ(ierr); 122 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 123 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 124 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 125 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 126 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 127 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 128 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 129 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 ierr = PetscFree(a);CHKERRQ(ierr); 132 133 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 134 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 135 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg) 146 { 147 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148 PetscErrorCode ierr; 149 150 PetscFunctionBegin; 151 switch (op) { 152 case MAT_ROW_ORIENTED: 153 a->roworiented = flg; 154 break; 155 case MAT_COLUMNS_SORTED: 156 a->sorted = flg; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = flg; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = (flg ? 1 : 0); 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = (flg ? -1 : 0); 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = (flg ? -2 : 0); 169 break; 170 case MAT_ROWS_SORTED: 171 case MAT_NEW_DIAGONALS: 172 case MAT_IGNORE_OFF_PROC_ENTRIES: 173 case MAT_USE_HASH_TABLE: 174 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 175 break; 176 case MAT_HERMITIAN: 177 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 178 case MAT_SYMMETRIC: 179 case MAT_STRUCTURALLY_SYMMETRIC: 180 case MAT_SYMMETRY_ETERNAL: 181 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 182 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 183 break; 184 case MAT_IGNORE_LOWER_TRIANGULAR: 185 a->ignore_ltriangular = flg; 186 break; 187 case MAT_ERROR_LOWER_TRIANGULAR: 188 a->ignore_ltriangular = flg; 189 break; 190 case MAT_GETROW_UPPERTRIANGULAR: 191 a->getrow_utriangular = flg; 192 break; 193 default: 194 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 201 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 202 { 203 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 204 PetscErrorCode ierr; 205 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 206 MatScalar *aa,*aa_i; 207 PetscScalar *v_i; 208 209 PetscFunctionBegin; 210 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()"); 211 /* Get the upper triangular part of the row */ 212 bs = A->rmap.bs; 213 ai = a->i; 214 aj = a->j; 215 aa = a->a; 216 bs2 = a->bs2; 217 218 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 219 220 bn = row/bs; /* Block number */ 221 bp = row % bs; /* Block position */ 222 M = ai[bn+1] - ai[bn]; 223 *ncols = bs*M; 224 225 if (v) { 226 *v = 0; 227 if (*ncols) { 228 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 229 for (i=0; i<M; i++) { /* for each block in the block row */ 230 v_i = *v + i*bs; 231 aa_i = aa + bs2*(ai[bn] + i); 232 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 233 } 234 } 235 } 236 237 if (cols) { 238 *cols = 0; 239 if (*ncols) { 240 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 241 for (i=0; i<M; i++) { /* for each block in the block row */ 242 cols_i = *cols + i*bs; 243 itmp = bs*aj[ai[bn] + i]; 244 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 245 } 246 } 247 } 248 249 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 250 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 251 #ifdef column_search 252 v_i = *v + M*bs; 253 cols_i = *cols + M*bs; 254 for (i=0; i<bn; i++){ /* for each block row */ 255 M = ai[i+1] - ai[i]; 256 for (j=0; j<M; j++){ 257 itmp = aj[ai[i] + j]; /* block column value */ 258 if (itmp == bn){ 259 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 260 for (k=0; k<bs; k++) { 261 *cols_i++ = i*bs+k; 262 *v_i++ = aa_i[k]; 263 } 264 *ncols += bs; 265 break; 266 } 267 } 268 } 269 #endif 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 275 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 276 { 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 281 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 287 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 288 { 289 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 290 291 PetscFunctionBegin; 292 a->getrow_utriangular = PETSC_TRUE; 293 PetscFunctionReturn(0); 294 } 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 297 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 298 { 299 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 300 301 PetscFunctionBegin; 302 a->getrow_utriangular = PETSC_FALSE; 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 308 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 309 { 310 PetscErrorCode ierr; 311 PetscFunctionBegin; 312 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNCT__ 317 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 318 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 319 { 320 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 321 PetscErrorCode ierr; 322 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 323 const char *name; 324 PetscViewerFormat format; 325 326 PetscFunctionBegin; 327 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 328 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 329 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 330 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 331 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 332 if (A->factor && bs>1){ 333 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 Mat aij; 337 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 338 ierr = MatView(aij,viewer);CHKERRQ(ierr); 339 ierr = MatDestroy(aij);CHKERRQ(ierr); 340 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 341 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 342 for (i=0; i<a->mbs; i++) { 343 for (j=0; j<bs; j++) { 344 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 345 for (k=a->i[i]; k<a->i[i+1]; k++) { 346 for (l=0; l<bs; l++) { 347 #if defined(PETSC_USE_COMPLEX) 348 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 349 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 350 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 351 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 352 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 353 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 354 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 355 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 356 } 357 #else 358 if (a->a[bs2*k + l*bs + j] != 0.0) { 359 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 360 } 361 #endif 362 } 363 } 364 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 365 } 366 } 367 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 368 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 369 PetscFunctionReturn(0); 370 } else { 371 if (A->factor && bs>1){ 372 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 373 } 374 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 375 for (i=0; i<a->mbs; i++) { 376 for (j=0; j<bs; j++) { 377 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 378 for (k=a->i[i]; k<a->i[i+1]; k++) { 379 for (l=0; l<bs; l++) { 380 #if defined(PETSC_USE_COMPLEX) 381 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 383 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 384 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 385 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 386 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 387 } else { 388 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 389 } 390 #else 391 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 392 #endif 393 } 394 } 395 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 396 } 397 } 398 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 399 } 400 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 406 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 407 { 408 Mat A = (Mat) Aa; 409 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 410 PetscErrorCode ierr; 411 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 412 PetscMPIInt rank; 413 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 414 MatScalar *aa; 415 MPI_Comm comm; 416 PetscViewer viewer; 417 418 PetscFunctionBegin; 419 /* 420 This is nasty. If this is called from an originally parallel matrix 421 then all processes call this,but only the first has the matrix so the 422 rest should return immediately. 423 */ 424 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 425 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 426 if (rank) PetscFunctionReturn(0); 427 428 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 429 430 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 431 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 432 433 /* loop over matrix elements drawing boxes */ 434 color = PETSC_DRAW_BLUE; 435 for (i=0,row=0; i<mbs; i++,row+=bs) { 436 for (j=a->i[i]; j<a->i[i+1]; j++) { 437 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 438 x_l = a->j[j]*bs; x_r = x_l + 1.0; 439 aa = a->a + j*bs2; 440 for (k=0; k<bs; k++) { 441 for (l=0; l<bs; l++) { 442 if (PetscRealPart(*aa++) >= 0.) continue; 443 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 444 } 445 } 446 } 447 } 448 color = PETSC_DRAW_CYAN; 449 for (i=0,row=0; i<mbs; i++,row+=bs) { 450 for (j=a->i[i]; j<a->i[i+1]; j++) { 451 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 452 x_l = a->j[j]*bs; x_r = x_l + 1.0; 453 aa = a->a + j*bs2; 454 for (k=0; k<bs; k++) { 455 for (l=0; l<bs; l++) { 456 if (PetscRealPart(*aa++) != 0.) continue; 457 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 458 } 459 } 460 } 461 } 462 463 color = PETSC_DRAW_RED; 464 for (i=0,row=0; i<mbs; i++,row+=bs) { 465 for (j=a->i[i]; j<a->i[i+1]; j++) { 466 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 467 x_l = a->j[j]*bs; x_r = x_l + 1.0; 468 aa = a->a + j*bs2; 469 for (k=0; k<bs; k++) { 470 for (l=0; l<bs; l++) { 471 if (PetscRealPart(*aa++) <= 0.) continue; 472 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 473 } 474 } 475 } 476 } 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 482 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 483 { 484 PetscErrorCode ierr; 485 PetscReal xl,yl,xr,yr,w,h; 486 PetscDraw draw; 487 PetscTruth isnull; 488 489 PetscFunctionBegin; 490 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 491 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 492 493 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 494 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 495 xr += w; yr += h; xl = -w; yl = -h; 496 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 497 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 498 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatView_SeqSBAIJ" 504 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 505 { 506 PetscErrorCode ierr; 507 PetscTruth iascii,isdraw; 508 509 PetscFunctionBegin; 510 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 511 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 512 if (iascii){ 513 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 514 } else if (isdraw) { 515 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 516 } else { 517 Mat B; 518 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 519 ierr = MatView(B,viewer);CHKERRQ(ierr); 520 ierr = MatDestroy(B);CHKERRQ(ierr); 521 } 522 PetscFunctionReturn(0); 523 } 524 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 528 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 529 { 530 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 531 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 532 PetscInt *ai = a->i,*ailen = a->ilen; 533 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 534 MatScalar *ap,*aa = a->a; 535 536 PetscFunctionBegin; 537 for (k=0; k<m; k++) { /* loop over rows */ 538 row = im[k]; brow = row/bs; 539 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 540 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 541 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 542 nrow = ailen[brow]; 543 for (l=0; l<n; l++) { /* loop over columns */ 544 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 545 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 546 col = in[l] ; 547 bcol = col/bs; 548 cidx = col%bs; 549 ridx = row%bs; 550 high = nrow; 551 low = 0; /* assume unsorted */ 552 while (high-low > 5) { 553 t = (low+high)/2; 554 if (rp[t] > bcol) high = t; 555 else low = t; 556 } 557 for (i=low; i<high; i++) { 558 if (rp[i] > bcol) break; 559 if (rp[i] == bcol) { 560 *v++ = ap[bs2*i+bs*cidx+ridx]; 561 goto finished; 562 } 563 } 564 *v++ = 0.0; 565 finished:; 566 } 567 } 568 PetscFunctionReturn(0); 569 } 570 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 574 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 575 { 576 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 577 PetscErrorCode ierr; 578 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 579 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 580 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 581 PetscTruth roworiented=a->roworiented; 582 const MatScalar *value = v; 583 MatScalar *ap,*aa = a->a,*bap; 584 585 PetscFunctionBegin; 586 if (roworiented) { 587 stepval = (n-1)*bs; 588 } else { 589 stepval = (m-1)*bs; 590 } 591 for (k=0; k<m; k++) { /* loop over added rows */ 592 row = im[k]; 593 if (row < 0) continue; 594 #if defined(PETSC_USE_DEBUG) 595 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 596 #endif 597 rp = aj + ai[row]; 598 ap = aa + bs2*ai[row]; 599 rmax = imax[row]; 600 nrow = ailen[row]; 601 low = 0; 602 high = nrow; 603 for (l=0; l<n; l++) { /* loop over added columns */ 604 if (in[l] < 0) continue; 605 col = in[l]; 606 #if defined(PETSC_USE_DEBUG) 607 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 608 #endif 609 if (col < row) continue; /* ignore lower triangular block */ 610 if (roworiented) { 611 value = v + k*(stepval+bs)*bs + l*bs; 612 } else { 613 value = v + l*(stepval+bs)*bs + k*bs; 614 } 615 if (col <= lastcol) low = 0; else high = nrow; 616 lastcol = col; 617 while (high-low > 7) { 618 t = (low+high)/2; 619 if (rp[t] > col) high = t; 620 else low = t; 621 } 622 for (i=low; i<high; i++) { 623 if (rp[i] > col) break; 624 if (rp[i] == col) { 625 bap = ap + bs2*i; 626 if (roworiented) { 627 if (is == ADD_VALUES) { 628 for (ii=0; ii<bs; ii++,value+=stepval) { 629 for (jj=ii; jj<bs2; jj+=bs) { 630 bap[jj] += *value++; 631 } 632 } 633 } else { 634 for (ii=0; ii<bs; ii++,value+=stepval) { 635 for (jj=ii; jj<bs2; jj+=bs) { 636 bap[jj] = *value++; 637 } 638 } 639 } 640 } else { 641 if (is == ADD_VALUES) { 642 for (ii=0; ii<bs; ii++,value+=stepval) { 643 for (jj=0; jj<bs; jj++) { 644 *bap++ += *value++; 645 } 646 } 647 } else { 648 for (ii=0; ii<bs; ii++,value+=stepval) { 649 for (jj=0; jj<bs; jj++) { 650 *bap++ = *value++; 651 } 652 } 653 } 654 } 655 goto noinsert2; 656 } 657 } 658 if (nonew == 1) goto noinsert2; 659 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 660 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 661 N = nrow++ - 1; high++; 662 /* shift up all the later entries in this row */ 663 for (ii=N; ii>=i; ii--) { 664 rp[ii+1] = rp[ii]; 665 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 666 } 667 if (N >= i) { 668 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 669 } 670 rp[i] = col; 671 bap = ap + bs2*i; 672 if (roworiented) { 673 for (ii=0; ii<bs; ii++,value+=stepval) { 674 for (jj=ii; jj<bs2; jj+=bs) { 675 bap[jj] = *value++; 676 } 677 } 678 } else { 679 for (ii=0; ii<bs; ii++,value+=stepval) { 680 for (jj=0; jj<bs; jj++) { 681 *bap++ = *value++; 682 } 683 } 684 } 685 noinsert2:; 686 low = i; 687 } 688 ailen[row] = nrow; 689 } 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 695 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 696 { 697 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 698 PetscErrorCode ierr; 699 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 700 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 701 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 702 MatScalar *aa = a->a,*ap; 703 704 PetscFunctionBegin; 705 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 706 707 if (m) rmax = ailen[0]; 708 for (i=1; i<mbs; i++) { 709 /* move each row back by the amount of empty slots (fshift) before it*/ 710 fshift += imax[i-1] - ailen[i-1]; 711 rmax = PetscMax(rmax,ailen[i]); 712 if (fshift) { 713 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 714 N = ailen[i]; 715 for (j=0; j<N; j++) { 716 ip[j-fshift] = ip[j]; 717 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 718 } 719 } 720 ai[i] = ai[i-1] + ailen[i-1]; 721 } 722 if (mbs) { 723 fshift += imax[mbs-1] - ailen[mbs-1]; 724 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 725 } 726 /* reset ilen and imax for each row */ 727 for (i=0; i<mbs; i++) { 728 ailen[i] = imax[i] = ai[i+1] - ai[i]; 729 } 730 a->nz = ai[mbs]; 731 732 /* diagonals may have moved, reset it */ 733 if (a->diag) { 734 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 735 } 736 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 737 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 738 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 739 a->reallocs = 0; 740 A->info.nz_unneeded = (PetscReal)fshift*bs2; 741 PetscFunctionReturn(0); 742 } 743 744 /* 745 This function returns an array of flags which indicate the locations of contiguous 746 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 747 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 748 Assume: sizes should be long enough to hold all the values. 749 */ 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 752 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 753 { 754 PetscInt i,j,k,row; 755 PetscTruth flg; 756 757 PetscFunctionBegin; 758 for (i=0,j=0; i<n; j++) { 759 row = idx[i]; 760 if (row%bs!=0) { /* Not the begining of a block */ 761 sizes[j] = 1; 762 i++; 763 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 764 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 765 i++; 766 } else { /* Begining of the block, so check if the complete block exists */ 767 flg = PETSC_TRUE; 768 for (k=1; k<bs; k++) { 769 if (row+k != idx[i+k]) { /* break in the block */ 770 flg = PETSC_FALSE; 771 break; 772 } 773 } 774 if (flg) { /* No break in the bs */ 775 sizes[j] = bs; 776 i+= bs; 777 } else { 778 sizes[j] = 1; 779 i++; 780 } 781 } 782 } 783 *bs_max = j; 784 PetscFunctionReturn(0); 785 } 786 787 788 /* Only add/insert a(i,j) with i<=j (blocks). 789 Any a(i,j) with i>j input by user is ingored. 790 */ 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 794 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 795 { 796 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 797 PetscErrorCode ierr; 798 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 799 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 800 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 801 PetscInt ridx,cidx,bs2=a->bs2; 802 MatScalar *ap,value,*aa=a->a,*bap; 803 804 PetscFunctionBegin; 805 for (k=0; k<m; k++) { /* loop over added rows */ 806 row = im[k]; /* row number */ 807 brow = row/bs; /* block row number */ 808 if (row < 0) continue; 809 #if defined(PETSC_USE_DEBUG) 810 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 811 #endif 812 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 813 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 814 rmax = imax[brow]; /* maximum space allocated for this row */ 815 nrow = ailen[brow]; /* actual length of this row */ 816 low = 0; 817 818 for (l=0; l<n; l++) { /* loop over added columns */ 819 if (in[l] < 0) continue; 820 #if defined(PETSC_USE_DEBUG) 821 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 822 #endif 823 col = in[l]; 824 bcol = col/bs; /* block col number */ 825 826 if (brow > bcol) { 827 if (a->ignore_ltriangular){ 828 continue; /* ignore lower triangular values */ 829 } else { 830 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 831 } 832 } 833 834 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 835 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 836 /* element value a(k,l) */ 837 if (roworiented) { 838 value = v[l + k*n]; 839 } else { 840 value = v[k + l*m]; 841 } 842 843 /* move pointer bap to a(k,l) quickly and add/insert value */ 844 if (col <= lastcol) low = 0; high = nrow; 845 lastcol = col; 846 while (high-low > 7) { 847 t = (low+high)/2; 848 if (rp[t] > bcol) high = t; 849 else low = t; 850 } 851 for (i=low; i<high; i++) { 852 if (rp[i] > bcol) break; 853 if (rp[i] == bcol) { 854 bap = ap + bs2*i + bs*cidx + ridx; 855 if (is == ADD_VALUES) *bap += value; 856 else *bap = value; 857 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 858 if (brow == bcol && ridx < cidx){ 859 bap = ap + bs2*i + bs*ridx + cidx; 860 if (is == ADD_VALUES) *bap += value; 861 else *bap = value; 862 } 863 goto noinsert1; 864 } 865 } 866 867 if (nonew == 1) goto noinsert1; 868 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 869 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 870 871 N = nrow++ - 1; high++; 872 /* shift up all the later entries in this row */ 873 for (ii=N; ii>=i; ii--) { 874 rp[ii+1] = rp[ii]; 875 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 876 } 877 if (N>=i) { 878 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 879 } 880 rp[i] = bcol; 881 ap[bs2*i + bs*cidx + ridx] = value; 882 noinsert1:; 883 low = i; 884 } 885 } /* end of loop over added columns */ 886 ailen[brow] = nrow; 887 } /* end of loop over added rows */ 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 893 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 894 { 895 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 896 Mat outA; 897 PetscErrorCode ierr; 898 PetscTruth row_identity; 899 900 PetscFunctionBegin; 901 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 902 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 903 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 904 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 905 906 outA = inA; 907 inA->factor = FACTOR_CHOLESKY; 908 909 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 910 /* 911 Blocksize < 8 have a special faster factorization/solver 912 for ICC(0) factorization with natural ordering 913 */ 914 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 915 case 1: 916 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 917 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 918 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 919 inA->ops->solves = MatSolves_SeqSBAIJ_1; 920 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 921 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 922 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 923 break; 924 case 2: 925 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 926 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 927 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 928 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 929 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 930 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 931 break; 932 case 3: 933 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 934 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 935 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 936 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 937 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 938 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 939 break; 940 case 4: 941 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 942 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 943 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 944 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 945 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 946 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 947 break; 948 case 5: 949 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 950 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 951 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 952 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 953 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 954 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 955 break; 956 case 6: 957 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 958 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 959 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 960 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 961 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 962 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 963 break; 964 case 7: 965 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 966 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 967 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 968 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 969 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 970 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 971 break; 972 default: 973 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 974 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 975 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 976 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 977 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 978 break; 979 } 980 981 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 982 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 983 a->row = row; 984 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 985 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 986 a->col = row; 987 988 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 989 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 990 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 991 992 if (!a->solve_work) { 993 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 994 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 995 } 996 997 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 EXTERN_C_BEGIN 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1004 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1005 { 1006 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1007 PetscInt i,nz,n; 1008 1009 PetscFunctionBegin; 1010 nz = baij->maxnz; 1011 n = mat->cmap.n; 1012 for (i=0; i<nz; i++) { 1013 baij->j[i] = indices[i]; 1014 } 1015 baij->nz = nz; 1016 for (i=0; i<n; i++) { 1017 baij->ilen[i] = baij->imax[i]; 1018 } 1019 PetscFunctionReturn(0); 1020 } 1021 EXTERN_C_END 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1025 /*@ 1026 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1027 in the matrix. 1028 1029 Input Parameters: 1030 + mat - the SeqSBAIJ matrix 1031 - indices - the column indices 1032 1033 Level: advanced 1034 1035 Notes: 1036 This can be called if you have precomputed the nonzero structure of the 1037 matrix and want to provide it to the matrix object to improve the performance 1038 of the MatSetValues() operation. 1039 1040 You MUST have set the correct numbers of nonzeros per row in the call to 1041 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1042 1043 MUST be called before any calls to MatSetValues() 1044 1045 .seealso: MatCreateSeqSBAIJ 1046 @*/ 1047 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1048 { 1049 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1050 1051 PetscFunctionBegin; 1052 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1053 PetscValidPointer(indices,2); 1054 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1055 if (f) { 1056 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1057 } else { 1058 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1059 } 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1065 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1066 { 1067 PetscErrorCode ierr; 1068 1069 PetscFunctionBegin; 1070 /* If the two matrices have the same copy implementation, use fast copy. */ 1071 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1072 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1073 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1074 1075 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1076 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1077 } 1078 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1079 } else { 1080 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1081 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1082 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1089 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1090 { 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #undef __FUNCT__ 1099 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1100 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1101 { 1102 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1103 PetscFunctionBegin; 1104 *array = a->a; 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1110 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1111 { 1112 PetscFunctionBegin; 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #include "petscblaslapack.h" 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1119 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1120 { 1121 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1122 PetscErrorCode ierr; 1123 PetscInt i,bs=Y->rmap.bs,bs2,j; 1124 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1125 1126 PetscFunctionBegin; 1127 if (str == SAME_NONZERO_PATTERN) { 1128 PetscScalar alpha = a; 1129 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1130 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1131 if (y->xtoy && y->XtoY != X) { 1132 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1133 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1134 } 1135 if (!y->xtoy) { /* get xtoy */ 1136 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1137 y->XtoY = X; 1138 } 1139 bs2 = bs*bs; 1140 for (i=0; i<x->nz; i++) { 1141 j = 0; 1142 while (j < bs2){ 1143 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1144 j++; 1145 } 1146 } 1147 ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1148 } else { 1149 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1150 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1151 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1152 } 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1158 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1159 { 1160 PetscFunctionBegin; 1161 *flg = PETSC_TRUE; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1167 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1168 { 1169 PetscFunctionBegin; 1170 *flg = PETSC_TRUE; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1176 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1177 { 1178 PetscFunctionBegin; 1179 *flg = PETSC_FALSE; 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1185 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1186 { 1187 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1188 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1189 PetscScalar *aa = a->a; 1190 1191 PetscFunctionBegin; 1192 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1198 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1199 { 1200 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1201 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1202 PetscScalar *aa = a->a; 1203 1204 PetscFunctionBegin; 1205 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 /* -------------------------------------------------------------------*/ 1210 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1211 MatGetRow_SeqSBAIJ, 1212 MatRestoreRow_SeqSBAIJ, 1213 MatMult_SeqSBAIJ_N, 1214 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1215 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1216 MatMultAdd_SeqSBAIJ_N, 1217 MatSolve_SeqSBAIJ_N, 1218 0, 1219 0, 1220 /*10*/ 0, 1221 0, 1222 MatCholeskyFactor_SeqSBAIJ, 1223 MatRelax_SeqSBAIJ, 1224 MatTranspose_SeqSBAIJ, 1225 /*15*/ MatGetInfo_SeqSBAIJ, 1226 MatEqual_SeqSBAIJ, 1227 MatGetDiagonal_SeqSBAIJ, 1228 MatDiagonalScale_SeqSBAIJ, 1229 MatNorm_SeqSBAIJ, 1230 /*20*/ 0, 1231 MatAssemblyEnd_SeqSBAIJ, 1232 0, 1233 MatSetOption_SeqSBAIJ, 1234 MatZeroEntries_SeqSBAIJ, 1235 /*25*/ 0, 1236 0, 1237 0, 1238 MatCholeskyFactorSymbolic_SeqSBAIJ, 1239 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1240 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1241 0, 1242 MatICCFactorSymbolic_SeqSBAIJ, 1243 MatGetArray_SeqSBAIJ, 1244 MatRestoreArray_SeqSBAIJ, 1245 /*35*/ MatDuplicate_SeqSBAIJ, 1246 MatForwardSolve_SeqSBAIJ_N, 1247 MatBackwardSolve_SeqSBAIJ_N, 1248 0, 1249 MatICCFactor_SeqSBAIJ, 1250 /*40*/ MatAXPY_SeqSBAIJ, 1251 MatGetSubMatrices_SeqSBAIJ, 1252 MatIncreaseOverlap_SeqSBAIJ, 1253 MatGetValues_SeqSBAIJ, 1254 MatCopy_SeqSBAIJ, 1255 /*45*/ 0, 1256 MatScale_SeqSBAIJ, 1257 0, 1258 0, 1259 0, 1260 /*50*/ 0, 1261 MatGetRowIJ_SeqSBAIJ, 1262 MatRestoreRowIJ_SeqSBAIJ, 1263 0, 1264 0, 1265 /*55*/ 0, 1266 0, 1267 0, 1268 0, 1269 MatSetValuesBlocked_SeqSBAIJ, 1270 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1271 0, 1272 0, 1273 0, 1274 0, 1275 /*65*/ 0, 1276 0, 1277 0, 1278 0, 1279 0, 1280 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1281 0, 1282 0, 1283 0, 1284 0, 1285 /*75*/ 0, 1286 0, 1287 0, 1288 0, 1289 0, 1290 /*80*/ 0, 1291 0, 1292 0, 1293 #if !defined(PETSC_USE_COMPLEX) 1294 MatGetInertia_SeqSBAIJ, 1295 #else 1296 0, 1297 #endif 1298 MatLoad_SeqSBAIJ, 1299 /*85*/ MatIsSymmetric_SeqSBAIJ, 1300 MatIsHermitian_SeqSBAIJ, 1301 MatIsStructurallySymmetric_SeqSBAIJ, 1302 0, 1303 0, 1304 /*90*/ 0, 1305 0, 1306 0, 1307 0, 1308 0, 1309 /*95*/ 0, 1310 0, 1311 0, 1312 0, 1313 0, 1314 /*100*/0, 1315 0, 1316 0, 1317 0, 1318 0, 1319 /*105*/0, 1320 MatRealPart_SeqSBAIJ, 1321 MatImaginaryPart_SeqSBAIJ, 1322 MatGetRowUpperTriangular_SeqSBAIJ, 1323 MatRestoreRowUpperTriangular_SeqSBAIJ 1324 }; 1325 1326 EXTERN_C_BEGIN 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1329 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1330 { 1331 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1332 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 if (aij->nonew != 1) { 1337 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS,PETSC_TRUE);first"); 1338 } 1339 1340 /* allocate space for values if not already there */ 1341 if (!aij->saved_values) { 1342 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1343 } 1344 1345 /* copy values over */ 1346 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 EXTERN_C_END 1350 1351 EXTERN_C_BEGIN 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1354 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1355 { 1356 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1357 PetscErrorCode ierr; 1358 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1359 1360 PetscFunctionBegin; 1361 if (aij->nonew != 1) { 1362 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS,PETSC_TRUE);first"); 1363 } 1364 if (!aij->saved_values) { 1365 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1366 } 1367 1368 /* copy values over */ 1369 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 EXTERN_C_END 1373 1374 EXTERN_C_BEGIN 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1377 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1378 { 1379 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1380 PetscErrorCode ierr; 1381 PetscInt i,mbs,bs2; 1382 PetscTruth skipallocation = PETSC_FALSE,flg; 1383 1384 PetscFunctionBegin; 1385 B->preallocated = PETSC_TRUE; 1386 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1387 B->rmap.bs = B->cmap.bs = bs; 1388 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1389 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1390 1391 mbs = B->rmap.N/bs; 1392 bs2 = bs*bs; 1393 1394 if (mbs*bs != B->rmap.N) { 1395 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1396 } 1397 1398 if (nz == MAT_SKIP_ALLOCATION) { 1399 skipallocation = PETSC_TRUE; 1400 nz = 0; 1401 } 1402 1403 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1404 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1405 if (nnz) { 1406 for (i=0; i<mbs; i++) { 1407 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1408 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1409 } 1410 } 1411 1412 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1413 if (!flg) { 1414 switch (bs) { 1415 case 1: 1416 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1417 B->ops->solve = MatSolve_SeqSBAIJ_1; 1418 B->ops->solves = MatSolves_SeqSBAIJ_1; 1419 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1420 B->ops->mult = MatMult_SeqSBAIJ_1; 1421 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1422 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1423 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1424 break; 1425 case 2: 1426 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1427 B->ops->solve = MatSolve_SeqSBAIJ_2; 1428 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1429 B->ops->mult = MatMult_SeqSBAIJ_2; 1430 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1431 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1432 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1433 break; 1434 case 3: 1435 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1436 B->ops->solve = MatSolve_SeqSBAIJ_3; 1437 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1438 B->ops->mult = MatMult_SeqSBAIJ_3; 1439 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1440 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1441 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1442 break; 1443 case 4: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1445 B->ops->solve = MatSolve_SeqSBAIJ_4; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1447 B->ops->mult = MatMult_SeqSBAIJ_4; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1449 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1450 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1451 break; 1452 case 5: 1453 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1454 B->ops->solve = MatSolve_SeqSBAIJ_5; 1455 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1456 B->ops->mult = MatMult_SeqSBAIJ_5; 1457 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1458 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1459 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1460 break; 1461 case 6: 1462 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1463 B->ops->solve = MatSolve_SeqSBAIJ_6; 1464 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1465 B->ops->mult = MatMult_SeqSBAIJ_6; 1466 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1467 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1468 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1469 break; 1470 case 7: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1472 B->ops->solve = MatSolve_SeqSBAIJ_7; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1474 B->ops->mult = MatMult_SeqSBAIJ_7; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1476 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1477 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1478 break; 1479 } 1480 } 1481 1482 b->mbs = mbs; 1483 b->nbs = mbs; 1484 if (!skipallocation) { 1485 /* b->ilen will count nonzeros in each block row so far. */ 1486 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1487 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1488 if (!nnz) { 1489 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1490 else if (nz <= 0) nz = 1; 1491 for (i=0; i<mbs; i++) { 1492 b->imax[i] = nz; 1493 } 1494 nz = nz*mbs; /* total nz */ 1495 } else { 1496 nz = 0; 1497 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1498 } 1499 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1500 1501 /* allocate the matrix space */ 1502 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1503 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1504 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1505 b->singlemalloc = PETSC_TRUE; 1506 1507 /* pointer to beginning of each row */ 1508 b->i[0] = 0; 1509 for (i=1; i<mbs+1; i++) { 1510 b->i[i] = b->i[i-1] + b->imax[i-1]; 1511 } 1512 b->free_a = PETSC_TRUE; 1513 b->free_ij = PETSC_TRUE; 1514 } else { 1515 b->free_a = PETSC_FALSE; 1516 b->free_ij = PETSC_FALSE; 1517 } 1518 1519 B->rmap.bs = bs; 1520 b->bs2 = bs2; 1521 b->nz = 0; 1522 b->maxnz = nz*bs2; 1523 1524 b->inew = 0; 1525 b->jnew = 0; 1526 b->anew = 0; 1527 b->a2anew = 0; 1528 b->permute = PETSC_FALSE; 1529 PetscFunctionReturn(0); 1530 } 1531 EXTERN_C_END 1532 1533 EXTERN_C_BEGIN 1534 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1535 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1536 EXTERN_C_END 1537 1538 /*MC 1539 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1540 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1541 1542 Options Database Keys: 1543 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1544 1545 Level: beginner 1546 1547 .seealso: MatCreateSeqSBAIJ 1548 M*/ 1549 1550 EXTERN_C_BEGIN 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1553 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1554 { 1555 Mat_SeqSBAIJ *b; 1556 PetscErrorCode ierr; 1557 PetscMPIInt size; 1558 PetscTruth flg; 1559 1560 PetscFunctionBegin; 1561 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1562 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1563 1564 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1565 B->data = (void*)b; 1566 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1567 B->ops->destroy = MatDestroy_SeqSBAIJ; 1568 B->ops->view = MatView_SeqSBAIJ; 1569 B->factor = 0; 1570 B->mapping = 0; 1571 b->row = 0; 1572 b->icol = 0; 1573 b->reallocs = 0; 1574 b->saved_values = 0; 1575 1576 1577 b->sorted = PETSC_FALSE; 1578 b->roworiented = PETSC_TRUE; 1579 b->nonew = 0; 1580 b->diag = 0; 1581 b->solve_work = 0; 1582 b->mult_work = 0; 1583 B->spptr = 0; 1584 b->keepzeroedrows = PETSC_FALSE; 1585 b->xtoy = 0; 1586 b->XtoY = 0; 1587 1588 b->inew = 0; 1589 b->jnew = 0; 1590 b->anew = 0; 1591 b->a2anew = 0; 1592 b->permute = PETSC_FALSE; 1593 1594 b->ignore_ltriangular = PETSC_FALSE; 1595 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1596 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1597 1598 b->getrow_utriangular = PETSC_FALSE; 1599 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1600 if (flg) b->getrow_utriangular = PETSC_TRUE; 1601 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1603 "MatStoreValues_SeqSBAIJ", 1604 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1606 "MatRetrieveValues_SeqSBAIJ", 1607 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1609 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1610 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1612 "MatConvert_SeqSBAIJ_SeqAIJ", 1613 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1615 "MatConvert_SeqSBAIJ_SeqBAIJ", 1616 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1618 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1619 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1620 1621 B->symmetric = PETSC_TRUE; 1622 B->structurally_symmetric = PETSC_TRUE; 1623 B->symmetric_set = PETSC_TRUE; 1624 B->structurally_symmetric_set = PETSC_TRUE; 1625 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 EXTERN_C_END 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1632 /*@C 1633 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1634 compressed row) format. For good matrix assembly performance the 1635 user should preallocate the matrix storage by setting the parameter nz 1636 (or the array nnz). By setting these parameters accurately, performance 1637 during matrix assembly can be increased by more than a factor of 50. 1638 1639 Collective on Mat 1640 1641 Input Parameters: 1642 + A - the symmetric matrix 1643 . bs - size of block 1644 . nz - number of block nonzeros per block row (same for all rows) 1645 - nnz - array containing the number of block nonzeros in the upper triangular plus 1646 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1647 1648 Options Database Keys: 1649 . -mat_no_unroll - uses code that does not unroll the loops in the 1650 block calculations (much slower) 1651 . -mat_block_size - size of the blocks to use 1652 1653 Level: intermediate 1654 1655 Notes: 1656 Specify the preallocated storage with either nz or nnz (not both). 1657 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1658 allocation. For additional details, see the users manual chapter on 1659 matrices. 1660 1661 If the nnz parameter is given then the nz parameter is ignored 1662 1663 1664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1665 @*/ 1666 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1667 { 1668 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1669 1670 PetscFunctionBegin; 1671 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1672 if (f) { 1673 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1674 } 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatCreateSeqSBAIJ" 1680 /*@C 1681 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1682 compressed row) format. For good matrix assembly performance the 1683 user should preallocate the matrix storage by setting the parameter nz 1684 (or the array nnz). By setting these parameters accurately, performance 1685 during matrix assembly can be increased by more than a factor of 50. 1686 1687 Collective on MPI_Comm 1688 1689 Input Parameters: 1690 + comm - MPI communicator, set to PETSC_COMM_SELF 1691 . bs - size of block 1692 . m - number of rows, or number of columns 1693 . nz - number of block nonzeros per block row (same for all rows) 1694 - nnz - array containing the number of block nonzeros in the upper triangular plus 1695 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1696 1697 Output Parameter: 1698 . A - the symmetric matrix 1699 1700 Options Database Keys: 1701 . -mat_no_unroll - uses code that does not unroll the loops in the 1702 block calculations (much slower) 1703 . -mat_block_size - size of the blocks to use 1704 1705 Level: intermediate 1706 1707 Notes: 1708 The number of rows and columns must be divisible by blocksize. 1709 This matrix type does not support complex Hermitian operation. 1710 1711 Specify the preallocated storage with either nz or nnz (not both). 1712 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1713 allocation. For additional details, see the users manual chapter on 1714 matrices. 1715 1716 If the nnz parameter is given then the nz parameter is ignored 1717 1718 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1719 @*/ 1720 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1721 { 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1726 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1727 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1728 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1734 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1735 { 1736 Mat C; 1737 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1740 1741 PetscFunctionBegin; 1742 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1743 1744 *B = 0; 1745 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1746 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1747 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1748 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1749 c = (Mat_SeqSBAIJ*)C->data; 1750 1751 C->preallocated = PETSC_TRUE; 1752 C->factor = A->factor; 1753 c->row = 0; 1754 c->icol = 0; 1755 c->saved_values = 0; 1756 c->keepzeroedrows = a->keepzeroedrows; 1757 C->assembled = PETSC_TRUE; 1758 1759 ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1760 ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1761 c->bs2 = a->bs2; 1762 c->mbs = a->mbs; 1763 c->nbs = a->nbs; 1764 1765 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1766 for (i=0; i<mbs; i++) { 1767 c->imax[i] = a->imax[i]; 1768 c->ilen[i] = a->ilen[i]; 1769 } 1770 1771 /* allocate the matrix space */ 1772 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1773 c->singlemalloc = PETSC_TRUE; 1774 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1776 if (mbs > 0) { 1777 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1778 if (cpvalues == MAT_COPY_VALUES) { 1779 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1780 } else { 1781 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1782 } 1783 } 1784 1785 c->sorted = a->sorted; 1786 c->roworiented = a->roworiented; 1787 c->nonew = a->nonew; 1788 1789 if (a->diag) { 1790 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1791 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1792 for (i=0; i<mbs; i++) { 1793 c->diag[i] = a->diag[i]; 1794 } 1795 } else c->diag = 0; 1796 c->nz = a->nz; 1797 c->maxnz = a->maxnz; 1798 c->solve_work = 0; 1799 c->mult_work = 0; 1800 c->free_a = PETSC_TRUE; 1801 c->free_ij = PETSC_TRUE; 1802 *B = C; 1803 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1809 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1810 { 1811 Mat_SeqSBAIJ *a; 1812 Mat B; 1813 PetscErrorCode ierr; 1814 int fd; 1815 PetscMPIInt size; 1816 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1817 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1818 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1819 PetscInt *masked,nmask,tmp,bs2,ishift; 1820 PetscScalar *aa; 1821 MPI_Comm comm = ((PetscObject)viewer)->comm; 1822 1823 PetscFunctionBegin; 1824 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1825 bs2 = bs*bs; 1826 1827 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1828 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1829 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1830 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1831 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1832 M = header[1]; N = header[2]; nz = header[3]; 1833 1834 if (header[3] < 0) { 1835 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1836 } 1837 1838 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1839 1840 /* 1841 This code adds extra rows to make sure the number of rows is 1842 divisible by the blocksize 1843 */ 1844 mbs = M/bs; 1845 extra_rows = bs - M + bs*(mbs); 1846 if (extra_rows == bs) extra_rows = 0; 1847 else mbs++; 1848 if (extra_rows) { 1849 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1850 } 1851 1852 /* read in row lengths */ 1853 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1854 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1855 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1856 1857 /* read in column indices */ 1858 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1859 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1860 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1861 1862 /* loop over row lengths determining block row lengths */ 1863 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1864 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1865 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1866 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1867 masked = mask + mbs; 1868 rowcount = 0; nzcount = 0; 1869 for (i=0; i<mbs; i++) { 1870 nmask = 0; 1871 for (j=0; j<bs; j++) { 1872 kmax = rowlengths[rowcount]; 1873 for (k=0; k<kmax; k++) { 1874 tmp = jj[nzcount++]/bs; /* block col. index */ 1875 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1876 } 1877 rowcount++; 1878 } 1879 s_browlengths[i] += nmask; 1880 1881 /* zero out the mask elements we set */ 1882 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1883 } 1884 1885 /* create our matrix */ 1886 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1887 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1888 ierr = MatSetType(B,type);CHKERRQ(ierr); 1889 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1890 a = (Mat_SeqSBAIJ*)B->data; 1891 1892 /* set matrix "i" values */ 1893 a->i[0] = 0; 1894 for (i=1; i<= mbs; i++) { 1895 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1896 a->ilen[i-1] = s_browlengths[i-1]; 1897 } 1898 a->nz = a->i[mbs]; 1899 1900 /* read in nonzero values */ 1901 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1902 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1903 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1904 1905 /* set "a" and "j" values into matrix */ 1906 nzcount = 0; jcount = 0; 1907 for (i=0; i<mbs; i++) { 1908 nzcountb = nzcount; 1909 nmask = 0; 1910 for (j=0; j<bs; j++) { 1911 kmax = rowlengths[i*bs+j]; 1912 for (k=0; k<kmax; k++) { 1913 tmp = jj[nzcount++]/bs; /* block col. index */ 1914 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1915 } 1916 } 1917 /* sort the masked values */ 1918 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1919 1920 /* set "j" values into matrix */ 1921 maskcount = 1; 1922 for (j=0; j<nmask; j++) { 1923 a->j[jcount++] = masked[j]; 1924 mask[masked[j]] = maskcount++; 1925 } 1926 1927 /* set "a" values into matrix */ 1928 ishift = bs2*a->i[i]; 1929 for (j=0; j<bs; j++) { 1930 kmax = rowlengths[i*bs+j]; 1931 for (k=0; k<kmax; k++) { 1932 tmp = jj[nzcountb]/bs ; /* block col. index */ 1933 if (tmp >= i){ 1934 block = mask[tmp] - 1; 1935 point = jj[nzcountb] - bs*tmp; 1936 idx = ishift + bs2*block + j + bs*point; 1937 a->a[idx] = aa[nzcountb]; 1938 } 1939 nzcountb++; 1940 } 1941 } 1942 /* zero out the mask elements we set */ 1943 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1944 } 1945 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1946 1947 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1948 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1949 ierr = PetscFree(aa);CHKERRQ(ierr); 1950 ierr = PetscFree(jj);CHKERRQ(ierr); 1951 ierr = PetscFree(mask);CHKERRQ(ierr); 1952 1953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 ierr = MatView_Private(B);CHKERRQ(ierr); 1956 *A = B; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1962 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1963 { 1964 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1965 MatScalar *aa=a->a,*v,*v1; 1966 PetscScalar *x,*b,*t,sum,d; 1967 PetscErrorCode ierr; 1968 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1969 PetscInt nz,nz1,*vj,*vj1,i; 1970 1971 PetscFunctionBegin; 1972 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1973 1974 its = its*lits; 1975 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1976 1977 if (bs > 1) 1978 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1979 1980 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1981 if (xx != bb) { 1982 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1983 } else { 1984 b = x; 1985 } 1986 1987 if (!a->relax_work) { 1988 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 1989 } 1990 t = a->relax_work; 1991 1992 if (flag & SOR_ZERO_INITIAL_GUESS) { 1993 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1994 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 1995 1996 for (i=0; i<m; i++){ 1997 d = *(aa + ai[i]); /* diag[i] */ 1998 v = aa + ai[i] + 1; 1999 vj = aj + ai[i] + 1; 2000 nz = ai[i+1] - ai[i] - 1; 2001 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2002 x[i] = omega*t[i]/d; 2003 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2004 } 2005 } 2006 2007 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2008 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2009 t = b; 2010 } 2011 2012 for (i=m-1; i>=0; i--){ 2013 d = *(aa + ai[i]); 2014 v = aa + ai[i] + 1; 2015 vj = aj + ai[i] + 1; 2016 nz = ai[i+1] - ai[i] - 1; 2017 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2018 sum = t[i]; 2019 while (nz--) sum -= x[*vj++]*(*v++); 2020 x[i] = (1-omega)*x[i] + omega*sum/d; 2021 } 2022 t = a->relax_work; 2023 } 2024 its--; 2025 } 2026 2027 while (its--) { 2028 /* 2029 forward sweep: 2030 for i=0,...,m-1: 2031 sum[i] = (b[i] - U(i,:)x )/d[i]; 2032 x[i] = (1-omega)x[i] + omega*sum[i]; 2033 b = b - x[i]*U^T(i,:); 2034 2035 */ 2036 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2037 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2038 2039 for (i=0; i<m; i++){ 2040 d = *(aa + ai[i]); /* diag[i] */ 2041 v = aa + ai[i] + 1; v1=v; 2042 vj = aj + ai[i] + 1; vj1=vj; 2043 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2044 sum = t[i]; 2045 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2046 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2047 x[i] = (1-omega)*x[i] + omega*sum/d; 2048 while (nz--) t[*vj++] -= x[i]*(*v++); 2049 } 2050 } 2051 2052 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2053 /* 2054 backward sweep: 2055 b = b - x[i]*U^T(i,:), i=0,...,n-2 2056 for i=m-1,...,0: 2057 sum[i] = (b[i] - U(i,:)x )/d[i]; 2058 x[i] = (1-omega)x[i] + omega*sum[i]; 2059 */ 2060 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2061 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2062 2063 for (i=0; i<m-1; i++){ /* update rhs */ 2064 v = aa + ai[i] + 1; 2065 vj = aj + ai[i] + 1; 2066 nz = ai[i+1] - ai[i] - 1; 2067 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2068 while (nz--) t[*vj++] -= x[i]*(*v++); 2069 } 2070 for (i=m-1; i>=0; i--){ 2071 d = *(aa + ai[i]); 2072 v = aa + ai[i] + 1; 2073 vj = aj + ai[i] + 1; 2074 nz = ai[i+1] - ai[i] - 1; 2075 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2076 sum = t[i]; 2077 while (nz--) sum -= x[*vj++]*(*v++); 2078 x[i] = (1-omega)*x[i] + omega*sum/d; 2079 } 2080 } 2081 } 2082 2083 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2084 if (bb != xx) { 2085 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2086 } 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2092 /*@ 2093 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2094 (upper triangular entries in CSR format) provided by the user. 2095 2096 Collective on MPI_Comm 2097 2098 Input Parameters: 2099 + comm - must be an MPI communicator of size 1 2100 . bs - size of block 2101 . m - number of rows 2102 . n - number of columns 2103 . i - row indices 2104 . j - column indices 2105 - a - matrix values 2106 2107 Output Parameter: 2108 . mat - the matrix 2109 2110 Level: intermediate 2111 2112 Notes: 2113 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2114 once the matrix is destroyed 2115 2116 You cannot set new nonzero locations into this matrix, that will generate an error. 2117 2118 The i and j indices are 0 based 2119 2120 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2121 2122 @*/ 2123 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2124 { 2125 PetscErrorCode ierr; 2126 PetscInt ii; 2127 Mat_SeqSBAIJ *sbaij; 2128 2129 PetscFunctionBegin; 2130 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2131 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2132 2133 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2134 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2135 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2136 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2137 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2138 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2139 2140 sbaij->i = i; 2141 sbaij->j = j; 2142 sbaij->a = a; 2143 sbaij->singlemalloc = PETSC_FALSE; 2144 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2145 sbaij->free_a = PETSC_FALSE; 2146 sbaij->free_ij = PETSC_FALSE; 2147 2148 for (ii=0; ii<m; ii++) { 2149 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2150 #if defined(PETSC_USE_DEBUG) 2151 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2152 #endif 2153 } 2154 #if defined(PETSC_USE_DEBUG) 2155 for (ii=0; ii<sbaij->i[m]; ii++) { 2156 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2157 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2158 } 2159 #endif 2160 2161 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2162 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 2167 2168 2169 2170