1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 *missing = PETSC_FALSE; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 *missing = PETSC_TRUE; 31 if (dd) *dd = i; 32 break; 33 } 34 } 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 41 { 42 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 46 PetscFunctionBegin; 47 if (!a->diag) { 48 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 49 } 50 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 *nn = n; 64 if (!ia) PetscFunctionReturn(0); 65 if (!blockcompressed) { 66 /* malloc & create the natural set of indices */ 67 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 68 for (i=0; i<n+1; i++) { 69 for (j=0; j<bs; j++) { 70 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 71 } 72 } 73 for (i=0; i<nz; i++) { 74 for (j=0; j<bs; j++) { 75 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 76 } 77 } 78 } else { /* blockcompressed */ 79 if (oshift == 1) { 80 /* temporarily add 1 to i and j indices */ 81 for (i=0; i<nz; i++) a->j[i]++; 82 for (i=0; i<n+1; i++) a->i[i]++; 83 } 84 *ia = a->i; *ja = a->j; 85 } 86 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscInt i,n = a->mbs,nz = a->i[n]; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 if (!ia) PetscFunctionReturn(0); 100 101 if (!blockcompressed) { 102 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 103 } else if (oshift == 1) { /* blockcompressed */ 104 for (i=0; i<nz; i++) a->j[i]--; 105 for (i=0; i<n+1; i++) a->i[i]--; 106 } 107 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 114 { 115 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 #if defined(PETSC_USE_LOG) 120 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 121 #endif 122 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 123 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 124 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 125 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126 ierr = PetscFree(a->diag);CHKERRQ(ierr); 127 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 128 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 129 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 130 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 131 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 132 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 133 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 134 135 ierr = PetscFree(a->inew);CHKERRQ(ierr); 136 ierr = PetscFree(a);CHKERRQ(ierr); 137 138 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 140 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 141 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 142 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 143 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 144 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 150 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg) 151 { 152 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 switch (op) { 157 case MAT_ROW_ORIENTED: 158 a->roworiented = flg; 159 break; 160 case MAT_KEEP_ZEROED_ROWS: 161 a->keepzeroedrows = flg; 162 break; 163 case MAT_NEW_NONZERO_LOCATIONS: 164 a->nonew = (flg ? 0 : 1); 165 break; 166 case MAT_NEW_NONZERO_LOCATION_ERR: 167 a->nonew = (flg ? -1 : 0); 168 break; 169 case MAT_NEW_NONZERO_ALLOCATION_ERR: 170 a->nonew = (flg ? -2 : 0); 171 break; 172 case MAT_NEW_DIAGONALS: 173 case MAT_IGNORE_OFF_PROC_ENTRIES: 174 case MAT_USE_HASH_TABLE: 175 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 176 break; 177 case MAT_HERMITIAN: 178 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 179 case MAT_SYMMETRIC: 180 case MAT_STRUCTURALLY_SYMMETRIC: 181 case MAT_SYMMETRY_ETERNAL: 182 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 183 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 184 break; 185 case MAT_IGNORE_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = flg; 187 break; 188 case MAT_ERROR_LOWER_TRIANGULAR: 189 a->ignore_ltriangular = flg; 190 break; 191 case MAT_GETROW_UPPERTRIANGULAR: 192 a->getrow_utriangular = flg; 193 break; 194 default: 195 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 202 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 203 { 204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 205 PetscErrorCode ierr; 206 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 207 MatScalar *aa,*aa_i; 208 PetscScalar *v_i; 209 210 PetscFunctionBegin; 211 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()"); 212 /* Get the upper triangular part of the row */ 213 bs = A->rmap.bs; 214 ai = a->i; 215 aj = a->j; 216 aa = a->a; 217 bs2 = a->bs2; 218 219 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 220 221 bn = row/bs; /* Block number */ 222 bp = row % bs; /* Block position */ 223 M = ai[bn+1] - ai[bn]; 224 *ncols = bs*M; 225 226 if (v) { 227 *v = 0; 228 if (*ncols) { 229 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 230 for (i=0; i<M; i++) { /* for each block in the block row */ 231 v_i = *v + i*bs; 232 aa_i = aa + bs2*(ai[bn] + i); 233 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 234 } 235 } 236 } 237 238 if (cols) { 239 *cols = 0; 240 if (*ncols) { 241 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 242 for (i=0; i<M; i++) { /* for each block in the block row */ 243 cols_i = *cols + i*bs; 244 itmp = bs*aj[ai[bn] + i]; 245 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 246 } 247 } 248 } 249 250 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 251 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 252 #ifdef column_search 253 v_i = *v + M*bs; 254 cols_i = *cols + M*bs; 255 for (i=0; i<bn; i++){ /* for each block row */ 256 M = ai[i+1] - ai[i]; 257 for (j=0; j<M; j++){ 258 itmp = aj[ai[i] + j]; /* block column value */ 259 if (itmp == bn){ 260 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 261 for (k=0; k<bs; k++) { 262 *cols_i++ = i*bs+k; 263 *v_i++ = aa_i[k]; 264 } 265 *ncols += bs; 266 break; 267 } 268 } 269 } 270 #endif 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 276 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 277 { 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 282 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 288 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 292 PetscFunctionBegin; 293 a->getrow_utriangular = PETSC_TRUE; 294 PetscFunctionReturn(0); 295 } 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 298 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 299 { 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 301 302 PetscFunctionBegin; 303 a->getrow_utriangular = PETSC_FALSE; 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 309 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,MatReuse reuse,Mat *B) 310 { 311 PetscErrorCode ierr; 312 PetscFunctionBegin; 313 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 314 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNCT__ 320 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 321 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 322 { 323 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 324 PetscErrorCode ierr; 325 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 326 const char *name; 327 PetscViewerFormat format; 328 329 PetscFunctionBegin; 330 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 331 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 332 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 333 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 334 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 335 Mat aij; 336 337 if (A->factor && bs>1){ 338 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 342 ierr = MatView(aij,viewer);CHKERRQ(ierr); 343 ierr = MatDestroy(aij);CHKERRQ(ierr); 344 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 345 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 346 for (i=0; i<a->mbs; i++) { 347 for (j=0; j<bs; j++) { 348 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 349 for (k=a->i[i]; k<a->i[i+1]; k++) { 350 for (l=0; l<bs; l++) { 351 #if defined(PETSC_USE_COMPLEX) 352 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 353 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 354 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 355 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 356 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 357 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 358 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 359 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 360 } 361 #else 362 if (a->a[bs2*k + l*bs + j] != 0.0) { 363 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 364 } 365 #endif 366 } 367 } 368 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 369 } 370 } 371 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 372 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 373 PetscFunctionReturn(0); 374 } else { 375 if (A->factor && bs>1){ 376 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 377 } 378 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 379 for (i=0; i<a->mbs; i++) { 380 for (j=0; j<bs; j++) { 381 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 382 for (k=a->i[i]; k<a->i[i+1]; k++) { 383 for (l=0; l<bs; l++) { 384 #if defined(PETSC_USE_COMPLEX) 385 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 386 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 387 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 388 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 389 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 390 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 393 } 394 #else 395 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 396 #endif 397 } 398 } 399 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 400 } 401 } 402 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 403 } 404 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 410 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 411 { 412 Mat A = (Mat) Aa; 413 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 414 PetscErrorCode ierr; 415 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 416 PetscMPIInt rank; 417 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 418 MatScalar *aa; 419 MPI_Comm comm; 420 PetscViewer viewer; 421 422 PetscFunctionBegin; 423 /* 424 This is nasty. If this is called from an originally parallel matrix 425 then all processes call this,but only the first has the matrix so the 426 rest should return immediately. 427 */ 428 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 429 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 430 if (rank) PetscFunctionReturn(0); 431 432 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 433 434 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 435 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 436 437 /* loop over matrix elements drawing boxes */ 438 color = PETSC_DRAW_BLUE; 439 for (i=0,row=0; i<mbs; i++,row+=bs) { 440 for (j=a->i[i]; j<a->i[i+1]; j++) { 441 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 442 x_l = a->j[j]*bs; x_r = x_l + 1.0; 443 aa = a->a + j*bs2; 444 for (k=0; k<bs; k++) { 445 for (l=0; l<bs; l++) { 446 if (PetscRealPart(*aa++) >= 0.) continue; 447 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 448 } 449 } 450 } 451 } 452 color = PETSC_DRAW_CYAN; 453 for (i=0,row=0; i<mbs; i++,row+=bs) { 454 for (j=a->i[i]; j<a->i[i+1]; j++) { 455 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 456 x_l = a->j[j]*bs; x_r = x_l + 1.0; 457 aa = a->a + j*bs2; 458 for (k=0; k<bs; k++) { 459 for (l=0; l<bs; l++) { 460 if (PetscRealPart(*aa++) != 0.) continue; 461 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 467 color = PETSC_DRAW_RED; 468 for (i=0,row=0; i<mbs; i++,row+=bs) { 469 for (j=a->i[i]; j<a->i[i+1]; j++) { 470 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 471 x_l = a->j[j]*bs; x_r = x_l + 1.0; 472 aa = a->a + j*bs2; 473 for (k=0; k<bs; k++) { 474 for (l=0; l<bs; l++) { 475 if (PetscRealPart(*aa++) <= 0.) continue; 476 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 477 } 478 } 479 } 480 } 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 486 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 487 { 488 PetscErrorCode ierr; 489 PetscReal xl,yl,xr,yr,w,h; 490 PetscDraw draw; 491 PetscTruth isnull; 492 493 PetscFunctionBegin; 494 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 495 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 496 497 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 498 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 499 xr += w; yr += h; xl = -w; yl = -h; 500 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 501 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 502 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_SeqSBAIJ" 508 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 509 { 510 PetscErrorCode ierr; 511 PetscTruth iascii,isdraw; 512 513 PetscFunctionBegin; 514 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 515 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 516 if (iascii){ 517 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 518 } else if (isdraw) { 519 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 520 } else { 521 Mat B; 522 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 523 ierr = MatView(B,viewer);CHKERRQ(ierr); 524 ierr = MatDestroy(B);CHKERRQ(ierr); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 532 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 533 { 534 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 535 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 536 PetscInt *ai = a->i,*ailen = a->ilen; 537 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 538 MatScalar *ap,*aa = a->a; 539 540 PetscFunctionBegin; 541 for (k=0; k<m; k++) { /* loop over rows */ 542 row = im[k]; brow = row/bs; 543 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 544 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 545 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 546 nrow = ailen[brow]; 547 for (l=0; l<n; l++) { /* loop over columns */ 548 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 549 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 550 col = in[l] ; 551 bcol = col/bs; 552 cidx = col%bs; 553 ridx = row%bs; 554 high = nrow; 555 low = 0; /* assume unsorted */ 556 while (high-low > 5) { 557 t = (low+high)/2; 558 if (rp[t] > bcol) high = t; 559 else low = t; 560 } 561 for (i=low; i<high; i++) { 562 if (rp[i] > bcol) break; 563 if (rp[i] == bcol) { 564 *v++ = ap[bs2*i+bs*cidx+ridx]; 565 goto finished; 566 } 567 } 568 *v++ = 0.0; 569 finished:; 570 } 571 } 572 PetscFunctionReturn(0); 573 } 574 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 578 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 579 { 580 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 583 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 585 PetscTruth roworiented=a->roworiented; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (roworiented) { 591 stepval = (n-1)*bs; 592 } else { 593 stepval = (m-1)*bs; 594 } 595 for (k=0; k<m; k++) { /* loop over added rows */ 596 row = im[k]; 597 if (row < 0) continue; 598 #if defined(PETSC_USE_DEBUG) 599 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 600 #endif 601 rp = aj + ai[row]; 602 ap = aa + bs2*ai[row]; 603 rmax = imax[row]; 604 nrow = ailen[row]; 605 low = 0; 606 high = nrow; 607 for (l=0; l<n; l++) { /* loop over added columns */ 608 if (in[l] < 0) continue; 609 col = in[l]; 610 #if defined(PETSC_USE_DEBUG) 611 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 612 #endif 613 if (col < row) continue; /* ignore lower triangular block */ 614 if (roworiented) { 615 value = v + k*(stepval+bs)*bs + l*bs; 616 } else { 617 value = v + l*(stepval+bs)*bs + k*bs; 618 } 619 if (col <= lastcol) low = 0; else high = nrow; 620 lastcol = col; 621 while (high-low > 7) { 622 t = (low+high)/2; 623 if (rp[t] > col) high = t; 624 else low = t; 625 } 626 for (i=low; i<high; i++) { 627 if (rp[i] > col) break; 628 if (rp[i] == col) { 629 bap = ap + bs2*i; 630 if (roworiented) { 631 if (is == ADD_VALUES) { 632 for (ii=0; ii<bs; ii++,value+=stepval) { 633 for (jj=ii; jj<bs2; jj+=bs) { 634 bap[jj] += *value++; 635 } 636 } 637 } else { 638 for (ii=0; ii<bs; ii++,value+=stepval) { 639 for (jj=ii; jj<bs2; jj+=bs) { 640 bap[jj] = *value++; 641 } 642 } 643 } 644 } else { 645 if (is == ADD_VALUES) { 646 for (ii=0; ii<bs; ii++,value+=stepval) { 647 for (jj=0; jj<bs; jj++) { 648 *bap++ += *value++; 649 } 650 } 651 } else { 652 for (ii=0; ii<bs; ii++,value+=stepval) { 653 for (jj=0; jj<bs; jj++) { 654 *bap++ = *value++; 655 } 656 } 657 } 658 } 659 goto noinsert2; 660 } 661 } 662 if (nonew == 1) goto noinsert2; 663 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 664 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 665 N = nrow++ - 1; high++; 666 /* shift up all the later entries in this row */ 667 for (ii=N; ii>=i; ii--) { 668 rp[ii+1] = rp[ii]; 669 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 670 } 671 if (N >= i) { 672 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 673 } 674 rp[i] = col; 675 bap = ap + bs2*i; 676 if (roworiented) { 677 for (ii=0; ii<bs; ii++,value+=stepval) { 678 for (jj=ii; jj<bs2; jj+=bs) { 679 bap[jj] = *value++; 680 } 681 } 682 } else { 683 for (ii=0; ii<bs; ii++,value+=stepval) { 684 for (jj=0; jj<bs; jj++) { 685 *bap++ = *value++; 686 } 687 } 688 } 689 noinsert2:; 690 low = i; 691 } 692 ailen[row] = nrow; 693 } 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 699 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 700 { 701 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 702 PetscErrorCode ierr; 703 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 704 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 705 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 706 MatScalar *aa = a->a,*ap; 707 708 PetscFunctionBegin; 709 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 710 711 if (m) rmax = ailen[0]; 712 for (i=1; i<mbs; i++) { 713 /* move each row back by the amount of empty slots (fshift) before it*/ 714 fshift += imax[i-1] - ailen[i-1]; 715 rmax = PetscMax(rmax,ailen[i]); 716 if (fshift) { 717 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 718 N = ailen[i]; 719 for (j=0; j<N; j++) { 720 ip[j-fshift] = ip[j]; 721 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 722 } 723 } 724 ai[i] = ai[i-1] + ailen[i-1]; 725 } 726 if (mbs) { 727 fshift += imax[mbs-1] - ailen[mbs-1]; 728 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 729 } 730 /* reset ilen and imax for each row */ 731 for (i=0; i<mbs; i++) { 732 ailen[i] = imax[i] = ai[i+1] - ai[i]; 733 } 734 a->nz = ai[mbs]; 735 736 /* diagonals may have moved, reset it */ 737 if (a->diag) { 738 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 739 } 740 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 741 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 742 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 743 a->reallocs = 0; 744 A->info.nz_unneeded = (PetscReal)fshift*bs2; 745 PetscFunctionReturn(0); 746 } 747 748 /* 749 This function returns an array of flags which indicate the locations of contiguous 750 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 751 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 752 Assume: sizes should be long enough to hold all the values. 753 */ 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 756 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 757 { 758 PetscInt i,j,k,row; 759 PetscTruth flg; 760 761 PetscFunctionBegin; 762 for (i=0,j=0; i<n; j++) { 763 row = idx[i]; 764 if (row%bs!=0) { /* Not the begining of a block */ 765 sizes[j] = 1; 766 i++; 767 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 768 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 769 i++; 770 } else { /* Begining of the block, so check if the complete block exists */ 771 flg = PETSC_TRUE; 772 for (k=1; k<bs; k++) { 773 if (row+k != idx[i+k]) { /* break in the block */ 774 flg = PETSC_FALSE; 775 break; 776 } 777 } 778 if (flg) { /* No break in the bs */ 779 sizes[j] = bs; 780 i+= bs; 781 } else { 782 sizes[j] = 1; 783 i++; 784 } 785 } 786 } 787 *bs_max = j; 788 PetscFunctionReturn(0); 789 } 790 791 792 /* Only add/insert a(i,j) with i<=j (blocks). 793 Any a(i,j) with i>j input by user is ingored. 794 */ 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 798 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 799 { 800 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 801 PetscErrorCode ierr; 802 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 803 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 804 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 805 PetscInt ridx,cidx,bs2=a->bs2; 806 MatScalar *ap,value,*aa=a->a,*bap; 807 808 PetscFunctionBegin; 809 for (k=0; k<m; k++) { /* loop over added rows */ 810 row = im[k]; /* row number */ 811 brow = row/bs; /* block row number */ 812 if (row < 0) continue; 813 #if defined(PETSC_USE_DEBUG) 814 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 815 #endif 816 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 817 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 818 rmax = imax[brow]; /* maximum space allocated for this row */ 819 nrow = ailen[brow]; /* actual length of this row */ 820 low = 0; 821 822 for (l=0; l<n; l++) { /* loop over added columns */ 823 if (in[l] < 0) continue; 824 #if defined(PETSC_USE_DEBUG) 825 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 826 #endif 827 col = in[l]; 828 bcol = col/bs; /* block col number */ 829 830 if (brow > bcol) { 831 if (a->ignore_ltriangular){ 832 continue; /* ignore lower triangular values */ 833 } else { 834 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 835 } 836 } 837 838 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 839 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 840 /* element value a(k,l) */ 841 if (roworiented) { 842 value = v[l + k*n]; 843 } else { 844 value = v[k + l*m]; 845 } 846 847 /* move pointer bap to a(k,l) quickly and add/insert value */ 848 if (col <= lastcol) low = 0; high = nrow; 849 lastcol = col; 850 while (high-low > 7) { 851 t = (low+high)/2; 852 if (rp[t] > bcol) high = t; 853 else low = t; 854 } 855 for (i=low; i<high; i++) { 856 if (rp[i] > bcol) break; 857 if (rp[i] == bcol) { 858 bap = ap + bs2*i + bs*cidx + ridx; 859 if (is == ADD_VALUES) *bap += value; 860 else *bap = value; 861 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 862 if (brow == bcol && ridx < cidx){ 863 bap = ap + bs2*i + bs*ridx + cidx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 } 867 goto noinsert1; 868 } 869 } 870 871 if (nonew == 1) goto noinsert1; 872 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 873 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 874 875 N = nrow++ - 1; high++; 876 /* shift up all the later entries in this row */ 877 for (ii=N; ii>=i; ii--) { 878 rp[ii+1] = rp[ii]; 879 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 880 } 881 if (N>=i) { 882 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 883 } 884 rp[i] = bcol; 885 ap[bs2*i + bs*cidx + ridx] = value; 886 noinsert1:; 887 low = i; 888 } 889 } /* end of loop over added columns */ 890 ailen[brow] = nrow; 891 } /* end of loop over added rows */ 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 897 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 898 { 899 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 900 Mat outA; 901 PetscErrorCode ierr; 902 PetscTruth row_identity; 903 904 PetscFunctionBegin; 905 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 906 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 907 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 908 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 909 910 outA = inA; 911 inA->factor = MAT_FACTOR_CHOLESKY; 912 913 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 914 /* 915 Blocksize < 8 have a special faster factorization/solver 916 for ICC(0) factorization with natural ordering 917 */ 918 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 919 case 1: 920 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 921 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 922 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 923 inA->ops->solves = MatSolves_SeqSBAIJ_1; 924 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 925 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 926 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 927 break; 928 case 2: 929 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 930 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 931 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 932 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 933 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 934 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 935 break; 936 case 3: 937 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 938 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 939 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 940 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 941 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 942 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 943 break; 944 case 4: 945 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 946 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 947 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 948 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 949 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 950 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 951 break; 952 case 5: 953 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 954 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 955 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 956 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 957 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 958 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 959 break; 960 case 6: 961 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 962 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 963 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 964 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 965 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 966 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 967 break; 968 case 7: 969 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 970 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 971 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 972 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 973 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 974 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 975 break; 976 default: 977 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 978 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 979 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 980 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 981 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 982 break; 983 } 984 985 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 986 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 987 a->row = row; 988 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 989 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 990 a->col = row; 991 992 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 993 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 994 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 995 996 if (!a->solve_work) { 997 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 998 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 999 } 1000 1001 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 EXTERN_C_BEGIN 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1008 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1009 { 1010 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1011 PetscInt i,nz,n; 1012 1013 PetscFunctionBegin; 1014 nz = baij->maxnz; 1015 n = mat->cmap.n; 1016 for (i=0; i<nz; i++) { 1017 baij->j[i] = indices[i]; 1018 } 1019 baij->nz = nz; 1020 for (i=0; i<n; i++) { 1021 baij->ilen[i] = baij->imax[i]; 1022 } 1023 PetscFunctionReturn(0); 1024 } 1025 EXTERN_C_END 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1029 /*@ 1030 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1031 in the matrix. 1032 1033 Input Parameters: 1034 + mat - the SeqSBAIJ matrix 1035 - indices - the column indices 1036 1037 Level: advanced 1038 1039 Notes: 1040 This can be called if you have precomputed the nonzero structure of the 1041 matrix and want to provide it to the matrix object to improve the performance 1042 of the MatSetValues() operation. 1043 1044 You MUST have set the correct numbers of nonzeros per row in the call to 1045 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1046 1047 MUST be called before any calls to MatSetValues() 1048 1049 .seealso: MatCreateSeqSBAIJ 1050 @*/ 1051 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1052 { 1053 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1054 1055 PetscFunctionBegin; 1056 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1057 PetscValidPointer(indices,2); 1058 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1059 if (f) { 1060 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1061 } else { 1062 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1069 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1070 { 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 /* If the two matrices have the same copy implementation, use fast copy. */ 1075 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1076 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1077 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1078 1079 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1080 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1081 } 1082 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1083 } else { 1084 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1085 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1086 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1093 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1094 { 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1104 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1105 { 1106 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1107 PetscFunctionBegin; 1108 *array = a->a; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1114 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1115 { 1116 PetscFunctionBegin; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #include "petscblaslapack.h" 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1123 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1124 { 1125 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1126 PetscErrorCode ierr; 1127 PetscInt i,bs=Y->rmap.bs,bs2,j; 1128 PetscBLASInt one = 1,bnz = PetscBLASIntCast(x->nz); 1129 1130 PetscFunctionBegin; 1131 if (str == SAME_NONZERO_PATTERN) { 1132 PetscScalar alpha = a; 1133 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1134 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1135 if (y->xtoy && y->XtoY != X) { 1136 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1137 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1138 } 1139 if (!y->xtoy) { /* get xtoy */ 1140 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1141 y->XtoY = X; 1142 } 1143 bs2 = bs*bs; 1144 for (i=0; i<x->nz; i++) { 1145 j = 0; 1146 while (j < bs2){ 1147 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1148 j++; 1149 } 1150 } 1151 ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1152 } else { 1153 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1154 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1155 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1156 } 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1162 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1163 { 1164 PetscFunctionBegin; 1165 *flg = PETSC_TRUE; 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1171 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1172 { 1173 PetscFunctionBegin; 1174 *flg = PETSC_TRUE; 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1180 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1181 { 1182 PetscFunctionBegin; 1183 *flg = PETSC_FALSE; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1189 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1190 { 1191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1192 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1193 MatScalar *aa = a->a; 1194 1195 PetscFunctionBegin; 1196 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1202 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1203 { 1204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1205 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1206 MatScalar *aa = a->a; 1207 1208 PetscFunctionBegin; 1209 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* -------------------------------------------------------------------*/ 1214 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1215 MatGetRow_SeqSBAIJ, 1216 MatRestoreRow_SeqSBAIJ, 1217 MatMult_SeqSBAIJ_N, 1218 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1219 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1220 MatMultAdd_SeqSBAIJ_N, 1221 MatSolve_SeqSBAIJ_N, 1222 0, 1223 0, 1224 /*10*/ 0, 1225 0, 1226 MatCholeskyFactor_SeqSBAIJ, 1227 MatRelax_SeqSBAIJ, 1228 MatTranspose_SeqSBAIJ, 1229 /*15*/ MatGetInfo_SeqSBAIJ, 1230 MatEqual_SeqSBAIJ, 1231 MatGetDiagonal_SeqSBAIJ, 1232 MatDiagonalScale_SeqSBAIJ, 1233 MatNorm_SeqSBAIJ, 1234 /*20*/ 0, 1235 MatAssemblyEnd_SeqSBAIJ, 1236 0, 1237 MatSetOption_SeqSBAIJ, 1238 MatZeroEntries_SeqSBAIJ, 1239 /*25*/ 0, 1240 0, 1241 0, 1242 MatCholeskyFactorSymbolic_SeqSBAIJ, 1243 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1244 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1245 0, 1246 MatICCFactorSymbolic_SeqSBAIJ, 1247 MatGetArray_SeqSBAIJ, 1248 MatRestoreArray_SeqSBAIJ, 1249 /*35*/ MatDuplicate_SeqSBAIJ, 1250 MatForwardSolve_SeqSBAIJ_N, 1251 MatBackwardSolve_SeqSBAIJ_N, 1252 0, 1253 MatICCFactor_SeqSBAIJ, 1254 /*40*/ MatAXPY_SeqSBAIJ, 1255 MatGetSubMatrices_SeqSBAIJ, 1256 MatIncreaseOverlap_SeqSBAIJ, 1257 MatGetValues_SeqSBAIJ, 1258 MatCopy_SeqSBAIJ, 1259 /*45*/ 0, 1260 MatScale_SeqSBAIJ, 1261 0, 1262 0, 1263 0, 1264 /*50*/ 0, 1265 MatGetRowIJ_SeqSBAIJ, 1266 MatRestoreRowIJ_SeqSBAIJ, 1267 0, 1268 0, 1269 /*55*/ 0, 1270 0, 1271 0, 1272 0, 1273 MatSetValuesBlocked_SeqSBAIJ, 1274 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1275 0, 1276 0, 1277 0, 1278 0, 1279 /*65*/ 0, 1280 0, 1281 0, 1282 0, 1283 0, 1284 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1285 0, 1286 0, 1287 0, 1288 0, 1289 /*75*/ 0, 1290 0, 1291 0, 1292 0, 1293 0, 1294 /*80*/ 0, 1295 0, 1296 0, 1297 #if !defined(PETSC_USE_COMPLEX) 1298 MatGetInertia_SeqSBAIJ, 1299 #else 1300 0, 1301 #endif 1302 MatLoad_SeqSBAIJ, 1303 /*85*/ MatIsSymmetric_SeqSBAIJ, 1304 MatIsHermitian_SeqSBAIJ, 1305 MatIsStructurallySymmetric_SeqSBAIJ, 1306 0, 1307 0, 1308 /*90*/ 0, 1309 0, 1310 0, 1311 0, 1312 0, 1313 /*95*/ 0, 1314 0, 1315 0, 1316 0, 1317 0, 1318 /*100*/0, 1319 0, 1320 0, 1321 0, 1322 0, 1323 /*105*/0, 1324 MatRealPart_SeqSBAIJ, 1325 MatImaginaryPart_SeqSBAIJ, 1326 MatGetRowUpperTriangular_SeqSBAIJ, 1327 MatRestoreRowUpperTriangular_SeqSBAIJ, 1328 /*110*/0, 1329 0, 1330 0, 1331 0, 1332 MatMissingDiagonal_SeqSBAIJ 1333 }; 1334 1335 EXTERN_C_BEGIN 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1338 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1339 { 1340 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1341 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 if (aij->nonew != 1) { 1346 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1347 } 1348 1349 /* allocate space for values if not already there */ 1350 if (!aij->saved_values) { 1351 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1352 } 1353 1354 /* copy values over */ 1355 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 EXTERN_C_END 1359 1360 EXTERN_C_BEGIN 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1363 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1364 { 1365 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1366 PetscErrorCode ierr; 1367 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1368 1369 PetscFunctionBegin; 1370 if (aij->nonew != 1) { 1371 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1372 } 1373 if (!aij->saved_values) { 1374 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1375 } 1376 1377 /* copy values over */ 1378 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 EXTERN_C_END 1382 1383 EXTERN_C_BEGIN 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1386 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1387 { 1388 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1389 PetscErrorCode ierr; 1390 PetscInt i,mbs,bs2; 1391 PetscTruth skipallocation = PETSC_FALSE,flg; 1392 1393 PetscFunctionBegin; 1394 B->preallocated = PETSC_TRUE; 1395 ierr = PetscOptionsGetInt(((PetscObject)B)->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1396 B->rmap.bs = B->cmap.bs = bs; 1397 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1398 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1399 1400 mbs = B->rmap.N/bs; 1401 bs2 = bs*bs; 1402 1403 if (mbs*bs != B->rmap.N) { 1404 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1405 } 1406 1407 if (nz == MAT_SKIP_ALLOCATION) { 1408 skipallocation = PETSC_TRUE; 1409 nz = 0; 1410 } 1411 1412 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1413 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1414 if (nnz) { 1415 for (i=0; i<mbs; i++) { 1416 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1417 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1418 } 1419 } 1420 1421 ierr = PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1422 if (!flg) { 1423 switch (bs) { 1424 case 1: 1425 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1426 B->ops->solve = MatSolve_SeqSBAIJ_1; 1427 B->ops->solves = MatSolves_SeqSBAIJ_1; 1428 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1429 B->ops->mult = MatMult_SeqSBAIJ_1; 1430 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1431 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1432 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1433 break; 1434 case 2: 1435 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1436 B->ops->solve = MatSolve_SeqSBAIJ_2; 1437 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1438 B->ops->mult = MatMult_SeqSBAIJ_2; 1439 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1440 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1441 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1442 break; 1443 case 3: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1445 B->ops->solve = MatSolve_SeqSBAIJ_3; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1447 B->ops->mult = MatMult_SeqSBAIJ_3; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1449 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1450 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1451 break; 1452 case 4: 1453 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1454 B->ops->solve = MatSolve_SeqSBAIJ_4; 1455 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1456 B->ops->mult = MatMult_SeqSBAIJ_4; 1457 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1458 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1459 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1460 break; 1461 case 5: 1462 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1463 B->ops->solve = MatSolve_SeqSBAIJ_5; 1464 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1465 B->ops->mult = MatMult_SeqSBAIJ_5; 1466 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1467 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1468 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1469 break; 1470 case 6: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1472 B->ops->solve = MatSolve_SeqSBAIJ_6; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1474 B->ops->mult = MatMult_SeqSBAIJ_6; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1476 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1477 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1478 break; 1479 case 7: 1480 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1481 B->ops->solve = MatSolve_SeqSBAIJ_7; 1482 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1483 B->ops->mult = MatMult_SeqSBAIJ_7; 1484 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1485 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1486 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1487 break; 1488 } 1489 } 1490 1491 b->mbs = mbs; 1492 b->nbs = mbs; 1493 if (!skipallocation) { 1494 if (!b->imax) { 1495 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1496 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 1497 } 1498 if (!nnz) { 1499 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1500 else if (nz <= 0) nz = 1; 1501 for (i=0; i<mbs; i++) { 1502 b->imax[i] = nz; 1503 } 1504 nz = nz*mbs; /* total nz */ 1505 } else { 1506 nz = 0; 1507 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1508 } 1509 /* b->ilen will count nonzeros in each block row so far. */ 1510 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1511 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1512 1513 /* allocate the matrix space */ 1514 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 1515 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1516 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1517 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1518 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1519 b->singlemalloc = PETSC_TRUE; 1520 1521 /* pointer to beginning of each row */ 1522 b->i[0] = 0; 1523 for (i=1; i<mbs+1; i++) { 1524 b->i[i] = b->i[i-1] + b->imax[i-1]; 1525 } 1526 b->free_a = PETSC_TRUE; 1527 b->free_ij = PETSC_TRUE; 1528 } else { 1529 b->free_a = PETSC_FALSE; 1530 b->free_ij = PETSC_FALSE; 1531 } 1532 1533 B->rmap.bs = bs; 1534 b->bs2 = bs2; 1535 b->nz = 0; 1536 b->maxnz = nz*bs2; 1537 1538 b->inew = 0; 1539 b->jnew = 0; 1540 b->anew = 0; 1541 b->a2anew = 0; 1542 b->permute = PETSC_FALSE; 1543 PetscFunctionReturn(0); 1544 } 1545 EXTERN_C_END 1546 1547 EXTERN_C_BEGIN 1548 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1549 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1550 EXTERN_C_END 1551 1552 1553 EXTERN_C_BEGIN 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatGetFactor_seqsbaij_petsc" 1556 PetscErrorCode MatGetFactor_seqsbaij_petsc(Mat A,MatFactorType ftype,Mat *B) 1557 { 1558 PetscInt n = A->rmap.n; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 1563 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 1564 if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1565 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 1566 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1567 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 1568 PetscFunctionReturn(0); 1569 } 1570 EXTERN_C_END 1571 1572 EXTERN_C_BEGIN 1573 extern PetscErrorCode MatGetFactor_seqsbaij_mumps(Mat,MatFactorType,Mat*); 1574 extern PetscErrorCode MatGetFactor_seqsbaij_spooles(Mat,MatFactorType,Mat*); 1575 EXTERN_C_END 1576 1577 /*MC 1578 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1579 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1580 1581 Options Database Keys: 1582 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1583 1584 Level: beginner 1585 1586 .seealso: MatCreateSeqSBAIJ 1587 M*/ 1588 1589 EXTERN_C_BEGIN 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1592 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1593 { 1594 Mat_SeqSBAIJ *b; 1595 PetscErrorCode ierr; 1596 PetscMPIInt size; 1597 PetscTruth flg; 1598 1599 PetscFunctionBegin; 1600 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 1601 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1602 1603 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1604 B->data = (void*)b; 1605 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1606 B->ops->destroy = MatDestroy_SeqSBAIJ; 1607 B->ops->view = MatView_SeqSBAIJ; 1608 B->mapping = 0; 1609 b->row = 0; 1610 b->icol = 0; 1611 b->reallocs = 0; 1612 b->saved_values = 0; 1613 1614 1615 b->roworiented = PETSC_TRUE; 1616 b->nonew = 0; 1617 b->diag = 0; 1618 b->solve_work = 0; 1619 b->mult_work = 0; 1620 B->spptr = 0; 1621 b->keepzeroedrows = PETSC_FALSE; 1622 b->xtoy = 0; 1623 b->XtoY = 0; 1624 1625 b->inew = 0; 1626 b->jnew = 0; 1627 b->anew = 0; 1628 b->a2anew = 0; 1629 b->permute = PETSC_FALSE; 1630 1631 b->ignore_ltriangular = PETSC_FALSE; 1632 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1633 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1634 1635 b->getrow_utriangular = PETSC_FALSE; 1636 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1637 if (flg) b->getrow_utriangular = PETSC_TRUE; 1638 1639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_spooles_C", 1640 "MatGetFactor_seqsbaij_spooles", 1641 MatGetFactor_seqsbaij_spooles);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_mumps_C", 1643 "MatGetFactor_seqsbaij_mumps", 1644 MatGetFactor_seqsbaij_mumps);CHKERRQ(ierr); 1645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_petsc_C", 1646 "MatGetFactor_seqsbaij_petsc", 1647 MatGetFactor_seqsbaij_petsc);CHKERRQ(ierr); 1648 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1649 "MatStoreValues_SeqSBAIJ", 1650 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1651 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1652 "MatRetrieveValues_SeqSBAIJ", 1653 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1654 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1655 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1656 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1658 "MatConvert_SeqSBAIJ_SeqAIJ", 1659 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1661 "MatConvert_SeqSBAIJ_SeqBAIJ", 1662 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1664 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1665 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1666 1667 B->symmetric = PETSC_TRUE; 1668 B->structurally_symmetric = PETSC_TRUE; 1669 B->symmetric_set = PETSC_TRUE; 1670 B->structurally_symmetric_set = PETSC_TRUE; 1671 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 EXTERN_C_END 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1678 /*@C 1679 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1680 compressed row) format. For good matrix assembly performance the 1681 user should preallocate the matrix storage by setting the parameter nz 1682 (or the array nnz). By setting these parameters accurately, performance 1683 during matrix assembly can be increased by more than a factor of 50. 1684 1685 Collective on Mat 1686 1687 Input Parameters: 1688 + A - the symmetric matrix 1689 . bs - size of block 1690 . nz - number of block nonzeros per block row (same for all rows) 1691 - nnz - array containing the number of block nonzeros in the upper triangular plus 1692 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1693 1694 Options Database Keys: 1695 . -mat_no_unroll - uses code that does not unroll the loops in the 1696 block calculations (much slower) 1697 . -mat_block_size - size of the blocks to use 1698 1699 Level: intermediate 1700 1701 Notes: 1702 Specify the preallocated storage with either nz or nnz (not both). 1703 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1704 allocation. For additional details, see the users manual chapter on 1705 matrices. 1706 1707 You can call MatGetInfo() to get information on how effective the preallocation was; 1708 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1709 You can also run with the option -info and look for messages with the string 1710 malloc in them to see if additional memory allocation was needed. 1711 1712 If the nnz parameter is given then the nz parameter is ignored 1713 1714 1715 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1716 @*/ 1717 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1718 { 1719 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1720 1721 PetscFunctionBegin; 1722 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1723 if (f) { 1724 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatCreateSeqSBAIJ" 1731 /*@C 1732 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1733 compressed row) format. For good matrix assembly performance the 1734 user should preallocate the matrix storage by setting the parameter nz 1735 (or the array nnz). By setting these parameters accurately, performance 1736 during matrix assembly can be increased by more than a factor of 50. 1737 1738 Collective on MPI_Comm 1739 1740 Input Parameters: 1741 + comm - MPI communicator, set to PETSC_COMM_SELF 1742 . bs - size of block 1743 . m - number of rows, or number of columns 1744 . nz - number of block nonzeros per block row (same for all rows) 1745 - nnz - array containing the number of block nonzeros in the upper triangular plus 1746 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1747 1748 Output Parameter: 1749 . A - the symmetric matrix 1750 1751 Options Database Keys: 1752 . -mat_no_unroll - uses code that does not unroll the loops in the 1753 block calculations (much slower) 1754 . -mat_block_size - size of the blocks to use 1755 1756 Level: intermediate 1757 1758 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1759 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 1760 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 1761 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1762 1763 Notes: 1764 The number of rows and columns must be divisible by blocksize. 1765 This matrix type does not support complex Hermitian operation. 1766 1767 Specify the preallocated storage with either nz or nnz (not both). 1768 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1769 allocation. For additional details, see the users manual chapter on 1770 matrices. 1771 1772 If the nnz parameter is given then the nz parameter is ignored 1773 1774 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1775 @*/ 1776 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1777 { 1778 PetscErrorCode ierr; 1779 1780 PetscFunctionBegin; 1781 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1782 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1783 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1784 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1785 PetscFunctionReturn(0); 1786 } 1787 1788 #undef __FUNCT__ 1789 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1790 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1791 { 1792 Mat C; 1793 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1794 PetscErrorCode ierr; 1795 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1796 1797 PetscFunctionBegin; 1798 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1799 1800 *B = 0; 1801 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1802 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1803 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1804 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1805 c = (Mat_SeqSBAIJ*)C->data; 1806 1807 C->preallocated = PETSC_TRUE; 1808 C->factor = A->factor; 1809 c->row = 0; 1810 c->icol = 0; 1811 c->saved_values = 0; 1812 c->keepzeroedrows = a->keepzeroedrows; 1813 C->assembled = PETSC_TRUE; 1814 1815 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1816 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1817 c->bs2 = a->bs2; 1818 c->mbs = a->mbs; 1819 c->nbs = a->nbs; 1820 1821 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1822 for (i=0; i<mbs; i++) { 1823 c->imax[i] = a->imax[i]; 1824 c->ilen[i] = a->ilen[i]; 1825 } 1826 1827 /* allocate the matrix space */ 1828 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1829 c->singlemalloc = PETSC_TRUE; 1830 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1831 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1832 if (mbs > 0) { 1833 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1834 if (cpvalues == MAT_COPY_VALUES) { 1835 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1836 } else { 1837 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1838 } 1839 } 1840 1841 c->roworiented = a->roworiented; 1842 c->nonew = a->nonew; 1843 1844 if (a->diag) { 1845 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1846 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1847 for (i=0; i<mbs; i++) { 1848 c->diag[i] = a->diag[i]; 1849 } 1850 } else c->diag = 0; 1851 c->nz = a->nz; 1852 c->maxnz = a->maxnz; 1853 c->solve_work = 0; 1854 c->mult_work = 0; 1855 c->free_a = PETSC_TRUE; 1856 c->free_ij = PETSC_TRUE; 1857 *B = C; 1858 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1864 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, const MatType type,Mat *A) 1865 { 1866 Mat_SeqSBAIJ *a; 1867 Mat B; 1868 PetscErrorCode ierr; 1869 int fd; 1870 PetscMPIInt size; 1871 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1872 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1873 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1874 PetscInt *masked,nmask,tmp,bs2,ishift; 1875 PetscScalar *aa; 1876 MPI_Comm comm = ((PetscObject)viewer)->comm; 1877 1878 PetscFunctionBegin; 1879 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1880 bs2 = bs*bs; 1881 1882 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1883 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1884 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1885 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1886 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1887 M = header[1]; N = header[2]; nz = header[3]; 1888 1889 if (header[3] < 0) { 1890 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1891 } 1892 1893 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1894 1895 /* 1896 This code adds extra rows to make sure the number of rows is 1897 divisible by the blocksize 1898 */ 1899 mbs = M/bs; 1900 extra_rows = bs - M + bs*(mbs); 1901 if (extra_rows == bs) extra_rows = 0; 1902 else mbs++; 1903 if (extra_rows) { 1904 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1905 } 1906 1907 /* read in row lengths */ 1908 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1909 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1910 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1911 1912 /* read in column indices */ 1913 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1914 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1915 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1916 1917 /* loop over row lengths determining block row lengths */ 1918 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1919 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1920 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1921 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1922 masked = mask + mbs; 1923 rowcount = 0; nzcount = 0; 1924 for (i=0; i<mbs; i++) { 1925 nmask = 0; 1926 for (j=0; j<bs; j++) { 1927 kmax = rowlengths[rowcount]; 1928 for (k=0; k<kmax; k++) { 1929 tmp = jj[nzcount++]/bs; /* block col. index */ 1930 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1931 } 1932 rowcount++; 1933 } 1934 s_browlengths[i] += nmask; 1935 1936 /* zero out the mask elements we set */ 1937 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1938 } 1939 1940 /* create our matrix */ 1941 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1942 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1943 ierr = MatSetType(B,type);CHKERRQ(ierr); 1944 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1945 a = (Mat_SeqSBAIJ*)B->data; 1946 1947 /* set matrix "i" values */ 1948 a->i[0] = 0; 1949 for (i=1; i<= mbs; i++) { 1950 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1951 a->ilen[i-1] = s_browlengths[i-1]; 1952 } 1953 a->nz = a->i[mbs]; 1954 1955 /* read in nonzero values */ 1956 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1957 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1958 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1959 1960 /* set "a" and "j" values into matrix */ 1961 nzcount = 0; jcount = 0; 1962 for (i=0; i<mbs; i++) { 1963 nzcountb = nzcount; 1964 nmask = 0; 1965 for (j=0; j<bs; j++) { 1966 kmax = rowlengths[i*bs+j]; 1967 for (k=0; k<kmax; k++) { 1968 tmp = jj[nzcount++]/bs; /* block col. index */ 1969 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1970 } 1971 } 1972 /* sort the masked values */ 1973 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1974 1975 /* set "j" values into matrix */ 1976 maskcount = 1; 1977 for (j=0; j<nmask; j++) { 1978 a->j[jcount++] = masked[j]; 1979 mask[masked[j]] = maskcount++; 1980 } 1981 1982 /* set "a" values into matrix */ 1983 ishift = bs2*a->i[i]; 1984 for (j=0; j<bs; j++) { 1985 kmax = rowlengths[i*bs+j]; 1986 for (k=0; k<kmax; k++) { 1987 tmp = jj[nzcountb]/bs ; /* block col. index */ 1988 if (tmp >= i){ 1989 block = mask[tmp] - 1; 1990 point = jj[nzcountb] - bs*tmp; 1991 idx = ishift + bs2*block + j + bs*point; 1992 a->a[idx] = aa[nzcountb]; 1993 } 1994 nzcountb++; 1995 } 1996 } 1997 /* zero out the mask elements we set */ 1998 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1999 } 2000 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2001 2002 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2003 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 2004 ierr = PetscFree(aa);CHKERRQ(ierr); 2005 ierr = PetscFree(jj);CHKERRQ(ierr); 2006 ierr = PetscFree(mask);CHKERRQ(ierr); 2007 2008 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2009 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2010 ierr = MatView_Private(B);CHKERRQ(ierr); 2011 *A = B; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "MatRelax_SeqSBAIJ" 2017 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2018 { 2019 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2020 MatScalar *aa=a->a,*v,*v1; 2021 PetscScalar *x,*b,*t,sum,d; 2022 PetscErrorCode ierr; 2023 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 2024 PetscInt nz,nz1,*vj,*vj1,i; 2025 2026 PetscFunctionBegin; 2027 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 2028 2029 its = its*lits; 2030 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2031 2032 if (bs > 1) 2033 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2034 2035 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2036 if (xx != bb) { 2037 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2038 } else { 2039 b = x; 2040 } 2041 2042 if (!a->relax_work) { 2043 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 2044 } 2045 t = a->relax_work; 2046 2047 if (flag & SOR_ZERO_INITIAL_GUESS) { 2048 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2049 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2050 2051 for (i=0; i<m; i++){ 2052 d = *(aa + ai[i]); /* diag[i] */ 2053 v = aa + ai[i] + 1; 2054 vj = aj + ai[i] + 1; 2055 nz = ai[i+1] - ai[i] - 1; 2056 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2057 x[i] = omega*t[i]/d; 2058 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2059 } 2060 } 2061 2062 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2063 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2064 t = b; 2065 } 2066 2067 for (i=m-1; i>=0; i--){ 2068 d = *(aa + ai[i]); 2069 v = aa + ai[i] + 1; 2070 vj = aj + ai[i] + 1; 2071 nz = ai[i+1] - ai[i] - 1; 2072 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2073 sum = t[i]; 2074 while (nz--) sum -= x[*vj++]*(*v++); 2075 x[i] = (1-omega)*x[i] + omega*sum/d; 2076 } 2077 t = a->relax_work; 2078 } 2079 its--; 2080 } 2081 2082 while (its--) { 2083 /* 2084 forward sweep: 2085 for i=0,...,m-1: 2086 sum[i] = (b[i] - U(i,:)x )/d[i]; 2087 x[i] = (1-omega)x[i] + omega*sum[i]; 2088 b = b - x[i]*U^T(i,:); 2089 2090 */ 2091 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2092 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2093 2094 for (i=0; i<m; i++){ 2095 d = *(aa + ai[i]); /* diag[i] */ 2096 v = aa + ai[i] + 1; v1=v; 2097 vj = aj + ai[i] + 1; vj1=vj; 2098 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2099 sum = t[i]; 2100 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2101 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2102 x[i] = (1-omega)*x[i] + omega*sum/d; 2103 while (nz--) t[*vj++] -= x[i]*(*v++); 2104 } 2105 } 2106 2107 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2108 /* 2109 backward sweep: 2110 b = b - x[i]*U^T(i,:), i=0,...,n-2 2111 for i=m-1,...,0: 2112 sum[i] = (b[i] - U(i,:)x )/d[i]; 2113 x[i] = (1-omega)x[i] + omega*sum[i]; 2114 */ 2115 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2116 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2117 2118 for (i=0; i<m-1; i++){ /* update rhs */ 2119 v = aa + ai[i] + 1; 2120 vj = aj + ai[i] + 1; 2121 nz = ai[i+1] - ai[i] - 1; 2122 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2123 while (nz--) t[*vj++] -= x[i]*(*v++); 2124 } 2125 for (i=m-1; i>=0; i--){ 2126 d = *(aa + ai[i]); 2127 v = aa + ai[i] + 1; 2128 vj = aj + ai[i] + 1; 2129 nz = ai[i+1] - ai[i] - 1; 2130 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2131 sum = t[i]; 2132 while (nz--) sum -= x[*vj++]*(*v++); 2133 x[i] = (1-omega)*x[i] + omega*sum/d; 2134 } 2135 } 2136 } 2137 2138 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2139 if (bb != xx) { 2140 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2141 } 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2147 /*@ 2148 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2149 (upper triangular entries in CSR format) provided by the user. 2150 2151 Collective on MPI_Comm 2152 2153 Input Parameters: 2154 + comm - must be an MPI communicator of size 1 2155 . bs - size of block 2156 . m - number of rows 2157 . n - number of columns 2158 . i - row indices 2159 . j - column indices 2160 - a - matrix values 2161 2162 Output Parameter: 2163 . mat - the matrix 2164 2165 Level: intermediate 2166 2167 Notes: 2168 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2169 once the matrix is destroyed 2170 2171 You cannot set new nonzero locations into this matrix, that will generate an error. 2172 2173 The i and j indices are 0 based 2174 2175 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2176 2177 @*/ 2178 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2179 { 2180 PetscErrorCode ierr; 2181 PetscInt ii; 2182 Mat_SeqSBAIJ *sbaij; 2183 2184 PetscFunctionBegin; 2185 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2186 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2187 2188 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2189 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2190 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2191 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2192 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2193 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2194 2195 sbaij->i = i; 2196 sbaij->j = j; 2197 sbaij->a = a; 2198 sbaij->singlemalloc = PETSC_FALSE; 2199 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2200 sbaij->free_a = PETSC_FALSE; 2201 sbaij->free_ij = PETSC_FALSE; 2202 2203 for (ii=0; ii<m; ii++) { 2204 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2205 #if defined(PETSC_USE_DEBUG) 2206 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2207 #endif 2208 } 2209 #if defined(PETSC_USE_DEBUG) 2210 for (ii=0; ii<sbaij->i[m]; ii++) { 2211 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2212 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2213 } 2214 #endif 2215 2216 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2217 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 2222 2223 2224 2225