1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 #if defined(PETSC_USE_LOG) 100 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 101 #endif 102 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 103 if (a->row) { 104 ierr = ISDestroy(a->row);CHKERRQ(ierr); 105 } 106 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 107 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 108 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 109 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 110 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 111 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 112 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 113 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 114 115 if (a->inew){ 116 ierr = PetscFree(a->inew);CHKERRQ(ierr); 117 a->inew = 0; 118 } 119 ierr = PetscFree(a);CHKERRQ(ierr); 120 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 127 PetscFunctionReturn(0); 128 } 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 132 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 133 { 134 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 135 PetscErrorCode ierr; 136 137 PetscFunctionBegin; 138 switch (op) { 139 case MAT_ROW_ORIENTED: 140 a->roworiented = PETSC_TRUE; 141 break; 142 case MAT_COLUMN_ORIENTED: 143 a->roworiented = PETSC_FALSE; 144 break; 145 case MAT_COLUMNS_SORTED: 146 a->sorted = PETSC_TRUE; 147 break; 148 case MAT_COLUMNS_UNSORTED: 149 a->sorted = PETSC_FALSE; 150 break; 151 case MAT_KEEP_ZEROED_ROWS: 152 a->keepzeroedrows = PETSC_TRUE; 153 break; 154 case MAT_NO_NEW_NONZERO_LOCATIONS: 155 a->nonew = 1; 156 break; 157 case MAT_NEW_NONZERO_LOCATION_ERR: 158 a->nonew = -1; 159 break; 160 case MAT_NEW_NONZERO_ALLOCATION_ERR: 161 a->nonew = -2; 162 break; 163 case MAT_YES_NEW_NONZERO_LOCATIONS: 164 a->nonew = 0; 165 break; 166 case MAT_ROWS_SORTED: 167 case MAT_ROWS_UNSORTED: 168 case MAT_YES_NEW_DIAGONALS: 169 case MAT_IGNORE_OFF_PROC_ENTRIES: 170 case MAT_USE_HASH_TABLE: 171 ierr = PetscVerboseInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 172 break; 173 case MAT_NO_NEW_DIAGONALS: 174 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 175 case MAT_NOT_SYMMETRIC: 176 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 177 case MAT_HERMITIAN: 178 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 179 case MAT_SYMMETRIC: 180 case MAT_STRUCTURALLY_SYMMETRIC: 181 case MAT_NOT_HERMITIAN: 182 case MAT_SYMMETRY_ETERNAL: 183 case MAT_NOT_SYMMETRY_ETERNAL: 184 case MAT_IGNORE_LOWER_TRIANGULAR: 185 a->ignore_ltriangular = PETSC_TRUE; 186 break; 187 case MAT_ERROR_LOWER_TRIANGULAR: 188 a->ignore_ltriangular = PETSC_FALSE; 189 break; 190 case MAT_GETROW_UPPERTRIANGULAR: 191 a->getrow_utriangular = PETSC_TRUE; 192 break; 193 default: 194 SETERRQ(PETSC_ERR_SUP,"unknown option"); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 201 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 202 { 203 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 204 PetscErrorCode ierr; 205 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 206 MatScalar *aa,*aa_i; 207 PetscScalar *v_i; 208 209 PetscFunctionBegin; 210 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()"); 211 /* Get the upper triangular part of the row */ 212 bs = A->bs; 213 ai = a->i; 214 aj = a->j; 215 aa = a->a; 216 bs2 = a->bs2; 217 218 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 219 220 bn = row/bs; /* Block number */ 221 bp = row % bs; /* Block position */ 222 M = ai[bn+1] - ai[bn]; 223 *ncols = bs*M; 224 225 if (v) { 226 *v = 0; 227 if (*ncols) { 228 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 229 for (i=0; i<M; i++) { /* for each block in the block row */ 230 v_i = *v + i*bs; 231 aa_i = aa + bs2*(ai[bn] + i); 232 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 233 } 234 } 235 } 236 237 if (cols) { 238 *cols = 0; 239 if (*ncols) { 240 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 241 for (i=0; i<M; i++) { /* for each block in the block row */ 242 cols_i = *cols + i*bs; 243 itmp = bs*aj[ai[bn] + i]; 244 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 245 } 246 } 247 } 248 249 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 250 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 251 #ifdef column_search 252 v_i = *v + M*bs; 253 cols_i = *cols + M*bs; 254 for (i=0; i<bn; i++){ /* for each block row */ 255 M = ai[i+1] - ai[i]; 256 for (j=0; j<M; j++){ 257 itmp = aj[ai[i] + j]; /* block column value */ 258 if (itmp == bn){ 259 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 260 for (k=0; k<bs; k++) { 261 *cols_i++ = i*bs+k; 262 *v_i++ = aa_i[k]; 263 } 264 *ncols += bs; 265 break; 266 } 267 } 268 } 269 #endif 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 275 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 276 { 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 281 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 287 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 288 { 289 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 290 291 PetscFunctionBegin; 292 a->getrow_utriangular = PETSC_TRUE; 293 PetscFunctionReturn(0); 294 } 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 297 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 298 { 299 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 300 301 PetscFunctionBegin; 302 a->getrow_utriangular = PETSC_FALSE; 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 308 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 309 { 310 PetscErrorCode ierr; 311 PetscFunctionBegin; 312 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNCT__ 317 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 318 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 319 { 320 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 321 PetscErrorCode ierr; 322 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 323 const char *name; 324 PetscViewerFormat format; 325 326 PetscFunctionBegin; 327 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 328 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 329 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 330 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 331 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 332 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 333 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 334 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 335 for (i=0; i<a->mbs; i++) { 336 for (j=0; j<bs; j++) { 337 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 338 for (k=a->i[i]; k<a->i[i+1]; k++) { 339 for (l=0; l<bs; l++) { 340 #if defined(PETSC_USE_COMPLEX) 341 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 342 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 343 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 344 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 345 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 346 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 348 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } 350 #else 351 if (a->a[bs2*k + l*bs + j] != 0.0) { 352 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 353 } 354 #endif 355 } 356 } 357 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 358 } 359 } 360 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 361 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 362 PetscFunctionReturn(0); 363 } else { 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 365 for (i=0; i<a->mbs; i++) { 366 for (j=0; j<bs; j++) { 367 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 368 for (k=a->i[i]; k<a->i[i+1]; k++) { 369 for (l=0; l<bs; l++) { 370 #if defined(PETSC_USE_COMPLEX) 371 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 372 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 373 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 374 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else { 378 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } 380 #else 381 ierr = PetscViewerASCIIPrintf(viewer," %D %G ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 382 #endif 383 } 384 } 385 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 386 } 387 } 388 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 389 } 390 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 391 PetscFunctionReturn(0); 392 } 393 394 #undef __FUNCT__ 395 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 396 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 397 { 398 Mat A = (Mat) Aa; 399 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 400 PetscErrorCode ierr; 401 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 402 PetscMPIInt rank; 403 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 404 MatScalar *aa; 405 MPI_Comm comm; 406 PetscViewer viewer; 407 408 PetscFunctionBegin; 409 /* 410 This is nasty. If this is called from an originally parallel matrix 411 then all processes call this,but only the first has the matrix so the 412 rest should return immediately. 413 */ 414 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 415 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 416 if (rank) PetscFunctionReturn(0); 417 418 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 419 420 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 421 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 422 423 /* loop over matrix elements drawing boxes */ 424 color = PETSC_DRAW_BLUE; 425 for (i=0,row=0; i<mbs; i++,row+=bs) { 426 for (j=a->i[i]; j<a->i[i+1]; j++) { 427 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 428 x_l = a->j[j]*bs; x_r = x_l + 1.0; 429 aa = a->a + j*bs2; 430 for (k=0; k<bs; k++) { 431 for (l=0; l<bs; l++) { 432 if (PetscRealPart(*aa++) >= 0.) continue; 433 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 434 } 435 } 436 } 437 } 438 color = PETSC_DRAW_CYAN; 439 for (i=0,row=0; i<mbs; i++,row+=bs) { 440 for (j=a->i[i]; j<a->i[i+1]; j++) { 441 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 442 x_l = a->j[j]*bs; x_r = x_l + 1.0; 443 aa = a->a + j*bs2; 444 for (k=0; k<bs; k++) { 445 for (l=0; l<bs; l++) { 446 if (PetscRealPart(*aa++) != 0.) continue; 447 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 448 } 449 } 450 } 451 } 452 453 color = PETSC_DRAW_RED; 454 for (i=0,row=0; i<mbs; i++,row+=bs) { 455 for (j=a->i[i]; j<a->i[i+1]; j++) { 456 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 457 x_l = a->j[j]*bs; x_r = x_l + 1.0; 458 aa = a->a + j*bs2; 459 for (k=0; k<bs; k++) { 460 for (l=0; l<bs; l++) { 461 if (PetscRealPart(*aa++) <= 0.) continue; 462 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 463 } 464 } 465 } 466 } 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 472 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 473 { 474 PetscErrorCode ierr; 475 PetscReal xl,yl,xr,yr,w,h; 476 PetscDraw draw; 477 PetscTruth isnull; 478 479 PetscFunctionBegin; 480 481 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 482 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 483 484 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 485 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 486 xr += w; yr += h; xl = -w; yl = -h; 487 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 488 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 489 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatView_SeqSBAIJ" 495 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 496 { 497 PetscErrorCode ierr; 498 PetscTruth iascii,isdraw; 499 500 PetscFunctionBegin; 501 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 502 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 503 if (iascii){ 504 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 505 } else if (isdraw) { 506 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 507 } else { 508 Mat B; 509 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 510 ierr = MatView(B,viewer);CHKERRQ(ierr); 511 ierr = MatDestroy(B);CHKERRQ(ierr); 512 } 513 PetscFunctionReturn(0); 514 } 515 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 519 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 520 { 521 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 522 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 523 PetscInt *ai = a->i,*ailen = a->ilen; 524 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 525 MatScalar *ap,*aa = a->a,zero = 0.0; 526 527 PetscFunctionBegin; 528 for (k=0; k<m; k++) { /* loop over rows */ 529 row = im[k]; brow = row/bs; 530 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 531 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 532 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 533 nrow = ailen[brow]; 534 for (l=0; l<n; l++) { /* loop over columns */ 535 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 536 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 537 col = in[l] ; 538 bcol = col/bs; 539 cidx = col%bs; 540 ridx = row%bs; 541 high = nrow; 542 low = 0; /* assume unsorted */ 543 while (high-low > 5) { 544 t = (low+high)/2; 545 if (rp[t] > bcol) high = t; 546 else low = t; 547 } 548 for (i=low; i<high; i++) { 549 if (rp[i] > bcol) break; 550 if (rp[i] == bcol) { 551 *v++ = ap[bs2*i+bs*cidx+ridx]; 552 goto finished; 553 } 554 } 555 *v++ = zero; 556 finished:; 557 } 558 } 559 PetscFunctionReturn(0); 560 } 561 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 565 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 566 { 567 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 568 PetscErrorCode ierr; 569 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 570 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 571 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 572 PetscTruth roworiented=a->roworiented; 573 const MatScalar *value = v; 574 MatScalar *ap,*aa = a->a,*bap; 575 576 PetscFunctionBegin; 577 if (roworiented) { 578 stepval = (n-1)*bs; 579 } else { 580 stepval = (m-1)*bs; 581 } 582 for (k=0; k<m; k++) { /* loop over added rows */ 583 row = im[k]; 584 if (row < 0) continue; 585 #if defined(PETSC_USE_DEBUG) 586 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 587 #endif 588 rp = aj + ai[row]; 589 ap = aa + bs2*ai[row]; 590 rmax = imax[row]; 591 nrow = ailen[row]; 592 low = 0; 593 high = nrow; 594 for (l=0; l<n; l++) { /* loop over added columns */ 595 if (in[l] < 0) continue; 596 col = in[l]; 597 #if defined(PETSC_USE_DEBUG) 598 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 599 #endif 600 if (col < row) continue; /* ignore lower triangular block */ 601 if (roworiented) { 602 value = v + k*(stepval+bs)*bs + l*bs; 603 } else { 604 value = v + l*(stepval+bs)*bs + k*bs; 605 } 606 if (col <= lastcol) low = 0; else high = nrow; 607 lastcol = col; 608 while (high-low > 7) { 609 t = (low+high)/2; 610 if (rp[t] > col) high = t; 611 else low = t; 612 } 613 for (i=low; i<high; i++) { 614 if (rp[i] > col) break; 615 if (rp[i] == col) { 616 bap = ap + bs2*i; 617 if (roworiented) { 618 if (is == ADD_VALUES) { 619 for (ii=0; ii<bs; ii++,value+=stepval) { 620 for (jj=ii; jj<bs2; jj+=bs) { 621 bap[jj] += *value++; 622 } 623 } 624 } else { 625 for (ii=0; ii<bs; ii++,value+=stepval) { 626 for (jj=ii; jj<bs2; jj+=bs) { 627 bap[jj] = *value++; 628 } 629 } 630 } 631 } else { 632 if (is == ADD_VALUES) { 633 for (ii=0; ii<bs; ii++,value+=stepval) { 634 for (jj=0; jj<bs; jj++) { 635 *bap++ += *value++; 636 } 637 } 638 } else { 639 for (ii=0; ii<bs; ii++,value+=stepval) { 640 for (jj=0; jj<bs; jj++) { 641 *bap++ = *value++; 642 } 643 } 644 } 645 } 646 goto noinsert2; 647 } 648 } 649 if (nonew == 1) goto noinsert2; 650 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 651 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 652 N = nrow++ - 1; high++; 653 /* shift up all the later entries in this row */ 654 for (ii=N; ii>=i; ii--) { 655 rp[ii+1] = rp[ii]; 656 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 657 } 658 if (N >= i) { 659 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 660 } 661 rp[i] = col; 662 bap = ap + bs2*i; 663 if (roworiented) { 664 for (ii=0; ii<bs; ii++,value+=stepval) { 665 for (jj=ii; jj<bs2; jj+=bs) { 666 bap[jj] = *value++; 667 } 668 } 669 } else { 670 for (ii=0; ii<bs; ii++,value+=stepval) { 671 for (jj=0; jj<bs; jj++) { 672 *bap++ = *value++; 673 } 674 } 675 } 676 noinsert2:; 677 low = i; 678 } 679 ailen[row] = nrow; 680 } 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 686 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 687 { 688 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 689 PetscErrorCode ierr; 690 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 691 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 692 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 693 MatScalar *aa = a->a,*ap; 694 695 PetscFunctionBegin; 696 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 697 698 if (m) rmax = ailen[0]; 699 for (i=1; i<mbs; i++) { 700 /* move each row back by the amount of empty slots (fshift) before it*/ 701 fshift += imax[i-1] - ailen[i-1]; 702 rmax = PetscMax(rmax,ailen[i]); 703 if (fshift) { 704 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 705 N = ailen[i]; 706 for (j=0; j<N; j++) { 707 ip[j-fshift] = ip[j]; 708 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 709 } 710 } 711 ai[i] = ai[i-1] + ailen[i-1]; 712 } 713 if (mbs) { 714 fshift += imax[mbs-1] - ailen[mbs-1]; 715 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 716 } 717 /* reset ilen and imax for each row */ 718 for (i=0; i<mbs; i++) { 719 ailen[i] = imax[i] = ai[i+1] - ai[i]; 720 } 721 a->nz = ai[mbs]; 722 723 /* diagonals may have moved, reset it */ 724 if (a->diag) { 725 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 726 } 727 ierr = PetscVerboseInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 728 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 729 ierr = PetscVerboseInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 730 ierr = PetscVerboseInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 731 a->reallocs = 0; 732 A->info.nz_unneeded = (PetscReal)fshift*bs2; 733 PetscFunctionReturn(0); 734 } 735 736 /* 737 This function returns an array of flags which indicate the locations of contiguous 738 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 739 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 740 Assume: sizes should be long enough to hold all the values. 741 */ 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 744 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 745 { 746 PetscInt i,j,k,row; 747 PetscTruth flg; 748 749 PetscFunctionBegin; 750 for (i=0,j=0; i<n; j++) { 751 row = idx[i]; 752 if (row%bs!=0) { /* Not the begining of a block */ 753 sizes[j] = 1; 754 i++; 755 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 756 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 757 i++; 758 } else { /* Begining of the block, so check if the complete block exists */ 759 flg = PETSC_TRUE; 760 for (k=1; k<bs; k++) { 761 if (row+k != idx[i+k]) { /* break in the block */ 762 flg = PETSC_FALSE; 763 break; 764 } 765 } 766 if (flg) { /* No break in the bs */ 767 sizes[j] = bs; 768 i+= bs; 769 } else { 770 sizes[j] = 1; 771 i++; 772 } 773 } 774 } 775 *bs_max = j; 776 PetscFunctionReturn(0); 777 } 778 779 780 /* Only add/insert a(i,j) with i<=j (blocks). 781 Any a(i,j) with i>j input by user is ingored. 782 */ 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 786 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 787 { 788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 789 PetscErrorCode ierr; 790 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 791 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 792 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 793 PetscInt ridx,cidx,bs2=a->bs2; 794 MatScalar *ap,value,*aa=a->a,*bap; 795 796 PetscFunctionBegin; 797 for (k=0; k<m; k++) { /* loop over added rows */ 798 row = im[k]; /* row number */ 799 brow = row/bs; /* block row number */ 800 if (row < 0) continue; 801 #if defined(PETSC_USE_DEBUG) 802 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 803 #endif 804 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 805 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 806 rmax = imax[brow]; /* maximum space allocated for this row */ 807 nrow = ailen[brow]; /* actual length of this row */ 808 low = 0; 809 810 for (l=0; l<n; l++) { /* loop over added columns */ 811 if (in[l] < 0) continue; 812 #if defined(PETSC_USE_DEBUG) 813 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 814 #endif 815 col = in[l]; 816 bcol = col/bs; /* block col number */ 817 818 if (brow > bcol) { 819 if (a->ignore_ltriangular){ 820 continue; /* ignore lower triangular values */ 821 } else { 822 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 823 } 824 } 825 826 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 827 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 828 /* element value a(k,l) */ 829 if (roworiented) { 830 value = v[l + k*n]; 831 } else { 832 value = v[k + l*m]; 833 } 834 835 /* move pointer bap to a(k,l) quickly and add/insert value */ 836 if (col <= lastcol) low = 0; high = nrow; 837 lastcol = col; 838 while (high-low > 7) { 839 t = (low+high)/2; 840 if (rp[t] > bcol) high = t; 841 else low = t; 842 } 843 for (i=low; i<high; i++) { 844 if (rp[i] > bcol) break; 845 if (rp[i] == bcol) { 846 bap = ap + bs2*i + bs*cidx + ridx; 847 if (is == ADD_VALUES) *bap += value; 848 else *bap = value; 849 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 850 if (brow == bcol && ridx < cidx){ 851 bap = ap + bs2*i + bs*ridx + cidx; 852 if (is == ADD_VALUES) *bap += value; 853 else *bap = value; 854 } 855 goto noinsert1; 856 } 857 } 858 859 if (nonew == 1) goto noinsert1; 860 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 861 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 862 863 N = nrow++ - 1; high++; 864 /* shift up all the later entries in this row */ 865 for (ii=N; ii>=i; ii--) { 866 rp[ii+1] = rp[ii]; 867 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 868 } 869 if (N>=i) { 870 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 871 } 872 rp[i] = bcol; 873 ap[bs2*i + bs*cidx + ridx] = value; 874 noinsert1:; 875 low = i; 876 } 877 } /* end of loop over added columns */ 878 ailen[brow] = nrow; 879 } /* end of loop over added rows */ 880 PetscFunctionReturn(0); 881 } 882 883 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 884 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 885 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 886 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 887 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 888 EXTERN PetscErrorCode MatScale_SeqSBAIJ(Mat,PetscScalar); 889 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 890 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 891 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 892 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 893 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 894 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 895 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 896 897 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 898 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 899 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 900 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 901 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 902 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 903 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 904 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 905 906 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 907 908 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 909 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 910 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 911 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 912 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 913 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 914 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 915 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 916 917 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 918 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 919 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 920 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 921 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 922 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 923 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 924 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 925 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 926 927 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 928 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 929 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 930 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 931 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 932 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 933 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 934 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 935 936 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 937 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 938 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 939 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 940 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 941 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 942 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 943 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 944 945 #ifdef HAVE_MatICCFactor 946 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 949 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 950 { 951 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 952 Mat outA; 953 PetscErrorCode ierr; 954 PetscTruth row_identity,col_identity; 955 956 PetscFunctionBegin; 957 outA = inA; 958 inA->factor = FACTOR_CHOLESKY; 959 960 if (!a->diag) { 961 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 962 } 963 /* 964 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 965 for ILU(0) factorization with natural ordering 966 */ 967 switch (a->bs) { 968 case 1: 969 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 970 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 971 inA->ops->solves = MatSolves_SeqSBAIJ_1; 972 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 973 case 2: 974 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 975 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 976 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 977 ierr = PetscVerboseInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 978 break; 979 case 3: 980 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 981 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 982 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 983 ierr = PetscVerboseInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 984 break; 985 case 4: 986 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 987 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 988 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 989 ierr = PetscVerboseInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 990 break; 991 case 5: 992 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 993 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 994 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 995 ierr = PetscVerboseInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 996 break; 997 case 6: 998 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 999 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1000 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1001 ierr = PetscVerboseInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1002 break; 1003 case 7: 1004 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1005 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1006 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1007 ierr = PetscVerboseInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1008 break; 1009 default: 1010 a->row = row; 1011 a->icol = col; 1012 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1013 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1014 1015 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1016 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1017 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1018 1019 if (!a->solve_work) { 1020 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1021 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1022 } 1023 } 1024 1025 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 #endif 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1032 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1033 { 1034 static PetscTruth called = PETSC_FALSE; 1035 MPI_Comm comm = A->comm; 1036 PetscErrorCode ierr; 1037 1038 PetscFunctionBegin; 1039 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1040 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1041 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1042 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_lower_triangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1043 ierr = (*PetscHelpPrintf)(comm," -mat_getrow_uppertriangular: Enable MatGetRow() for retrieving the upper triangular part of the row\n");CHKERRQ(ierr); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 EXTERN_C_BEGIN 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1050 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1051 { 1052 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1053 PetscInt i,nz,n; 1054 1055 PetscFunctionBegin; 1056 nz = baij->maxnz; 1057 n = mat->n; 1058 for (i=0; i<nz; i++) { 1059 baij->j[i] = indices[i]; 1060 } 1061 baij->nz = nz; 1062 for (i=0; i<n; i++) { 1063 baij->ilen[i] = baij->imax[i]; 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 EXTERN_C_END 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1071 /*@ 1072 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1073 in the matrix. 1074 1075 Input Parameters: 1076 + mat - the SeqSBAIJ matrix 1077 - indices - the column indices 1078 1079 Level: advanced 1080 1081 Notes: 1082 This can be called if you have precomputed the nonzero structure of the 1083 matrix and want to provide it to the matrix object to improve the performance 1084 of the MatSetValues() operation. 1085 1086 You MUST have set the correct numbers of nonzeros per row in the call to 1087 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1088 1089 MUST be called before any calls to MatSetValues() 1090 1091 .seealso: MatCreateSeqSBAIJ 1092 @*/ 1093 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1094 { 1095 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1096 1097 PetscFunctionBegin; 1098 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1099 PetscValidPointer(indices,2); 1100 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1101 if (f) { 1102 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1103 } else { 1104 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1111 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1112 { 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 /* If the two matrices have the same copy implementation, use fast copy. */ 1117 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1118 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1119 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1120 1121 if (a->i[A->m] != b->i[B->m]) { 1122 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1123 } 1124 ierr = PetscMemcpy(b->a,a->a,(a->i[A->m])*sizeof(PetscScalar));CHKERRQ(ierr); 1125 } else { 1126 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1127 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1128 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1129 } 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1135 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1136 { 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1146 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1147 { 1148 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1149 PetscFunctionBegin; 1150 *array = a->a; 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1156 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1157 { 1158 PetscFunctionBegin; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #include "petscblaslapack.h" 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1165 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1166 { 1167 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1168 PetscErrorCode ierr; 1169 PetscInt i,bs=Y->bs,bs2,j; 1170 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1171 1172 PetscFunctionBegin; 1173 if (str == SAME_NONZERO_PATTERN) { 1174 PetscScalar alpha = a; 1175 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1176 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1177 if (y->xtoy && y->XtoY != X) { 1178 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1179 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1180 } 1181 if (!y->xtoy) { /* get xtoy */ 1182 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1183 y->XtoY = X; 1184 } 1185 bs2 = bs*bs; 1186 for (i=0; i<x->nz; i++) { 1187 j = 0; 1188 while (j < bs2){ 1189 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1190 j++; 1191 } 1192 } 1193 ierr = PetscVerboseInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1194 } else { 1195 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1196 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1197 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1198 } 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1204 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1205 { 1206 PetscFunctionBegin; 1207 *flg = PETSC_TRUE; 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1213 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1214 { 1215 PetscFunctionBegin; 1216 *flg = PETSC_TRUE; 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1222 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1223 { 1224 PetscFunctionBegin; 1225 *flg = PETSC_FALSE; 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1231 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1232 { 1233 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1234 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1235 PetscScalar *aa = a->a; 1236 1237 PetscFunctionBegin; 1238 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1244 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1245 { 1246 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1247 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1248 PetscScalar *aa = a->a; 1249 1250 PetscFunctionBegin; 1251 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 /* -------------------------------------------------------------------*/ 1256 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1257 MatGetRow_SeqSBAIJ, 1258 MatRestoreRow_SeqSBAIJ, 1259 MatMult_SeqSBAIJ_N, 1260 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1261 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1262 MatMultAdd_SeqSBAIJ_N, 1263 MatSolve_SeqSBAIJ_N, 1264 0, 1265 0, 1266 /*10*/ 0, 1267 0, 1268 MatCholeskyFactor_SeqSBAIJ, 1269 MatRelax_SeqSBAIJ, 1270 MatTranspose_SeqSBAIJ, 1271 /*15*/ MatGetInfo_SeqSBAIJ, 1272 MatEqual_SeqSBAIJ, 1273 MatGetDiagonal_SeqSBAIJ, 1274 MatDiagonalScale_SeqSBAIJ, 1275 MatNorm_SeqSBAIJ, 1276 /*20*/ 0, 1277 MatAssemblyEnd_SeqSBAIJ, 1278 0, 1279 MatSetOption_SeqSBAIJ, 1280 MatZeroEntries_SeqSBAIJ, 1281 /*25*/ 0, 1282 0, 1283 0, 1284 MatCholeskyFactorSymbolic_SeqSBAIJ, 1285 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1286 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1287 0, 1288 MatICCFactorSymbolic_SeqSBAIJ, 1289 MatGetArray_SeqSBAIJ, 1290 MatRestoreArray_SeqSBAIJ, 1291 /*35*/ MatDuplicate_SeqSBAIJ, 1292 0, 1293 0, 1294 0, 1295 0, 1296 /*40*/ MatAXPY_SeqSBAIJ, 1297 MatGetSubMatrices_SeqSBAIJ, 1298 MatIncreaseOverlap_SeqSBAIJ, 1299 MatGetValues_SeqSBAIJ, 1300 MatCopy_SeqSBAIJ, 1301 /*45*/ MatPrintHelp_SeqSBAIJ, 1302 MatScale_SeqSBAIJ, 1303 0, 1304 0, 1305 0, 1306 /*50*/ 0, 1307 MatGetRowIJ_SeqSBAIJ, 1308 MatRestoreRowIJ_SeqSBAIJ, 1309 0, 1310 0, 1311 /*55*/ 0, 1312 0, 1313 0, 1314 0, 1315 MatSetValuesBlocked_SeqSBAIJ, 1316 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1317 0, 1318 0, 1319 MatGetPetscMaps_Petsc, 1320 0, 1321 /*65*/ 0, 1322 0, 1323 0, 1324 0, 1325 0, 1326 /*70*/ MatGetRowMax_SeqSBAIJ, 1327 0, 1328 0, 1329 0, 1330 0, 1331 /*75*/ 0, 1332 0, 1333 0, 1334 0, 1335 0, 1336 /*80*/ 0, 1337 0, 1338 0, 1339 #if !defined(PETSC_USE_COMPLEX) 1340 MatGetInertia_SeqSBAIJ, 1341 #else 1342 0, 1343 #endif 1344 MatLoad_SeqSBAIJ, 1345 /*85*/ MatIsSymmetric_SeqSBAIJ, 1346 MatIsHermitian_SeqSBAIJ, 1347 MatIsStructurallySymmetric_SeqSBAIJ, 1348 0, 1349 0, 1350 /*90*/ 0, 1351 0, 1352 0, 1353 0, 1354 0, 1355 /*95*/ 0, 1356 0, 1357 0, 1358 0, 1359 0, 1360 /*100*/0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 /*105*/0, 1366 MatRealPart_SeqSBAIJ, 1367 MatImaginaryPart_SeqSBAIJ, 1368 MatGetRowUpperTriangular_SeqSBAIJ, 1369 MatRestoreRowUpperTriangular_SeqSBAIJ 1370 }; 1371 1372 EXTERN_C_BEGIN 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1375 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1376 { 1377 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1378 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1379 PetscErrorCode ierr; 1380 1381 PetscFunctionBegin; 1382 if (aij->nonew != 1) { 1383 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1384 } 1385 1386 /* allocate space for values if not already there */ 1387 if (!aij->saved_values) { 1388 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1389 } 1390 1391 /* copy values over */ 1392 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 EXTERN_C_END 1396 1397 EXTERN_C_BEGIN 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1400 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1401 { 1402 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1403 PetscErrorCode ierr; 1404 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1405 1406 PetscFunctionBegin; 1407 if (aij->nonew != 1) { 1408 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1409 } 1410 if (!aij->saved_values) { 1411 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1412 } 1413 1414 /* copy values over */ 1415 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 EXTERN_C_END 1419 1420 EXTERN_C_BEGIN 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1423 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1424 { 1425 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1426 PetscErrorCode ierr; 1427 PetscInt i,mbs,bs2; 1428 PetscTruth skipallocation = PETSC_FALSE,flg; 1429 1430 PetscFunctionBegin; 1431 B->preallocated = PETSC_TRUE; 1432 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1433 mbs = B->m/bs; 1434 bs2 = bs*bs; 1435 1436 if (mbs*bs != B->m) { 1437 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1438 } 1439 1440 if (nz == MAT_SKIP_ALLOCATION) { 1441 skipallocation = PETSC_TRUE; 1442 nz = 0; 1443 } 1444 1445 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1446 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1447 if (nnz) { 1448 for (i=0; i<mbs; i++) { 1449 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1450 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1451 } 1452 } 1453 1454 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1455 if (!flg) { 1456 switch (bs) { 1457 case 1: 1458 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1459 B->ops->solve = MatSolve_SeqSBAIJ_1; 1460 B->ops->solves = MatSolves_SeqSBAIJ_1; 1461 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1462 B->ops->mult = MatMult_SeqSBAIJ_1; 1463 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1464 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1465 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1466 break; 1467 case 2: 1468 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1469 B->ops->solve = MatSolve_SeqSBAIJ_2; 1470 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1471 B->ops->mult = MatMult_SeqSBAIJ_2; 1472 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1473 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1474 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1475 break; 1476 case 3: 1477 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1478 B->ops->solve = MatSolve_SeqSBAIJ_3; 1479 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1480 B->ops->mult = MatMult_SeqSBAIJ_3; 1481 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1482 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1483 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1484 break; 1485 case 4: 1486 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1487 B->ops->solve = MatSolve_SeqSBAIJ_4; 1488 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1489 B->ops->mult = MatMult_SeqSBAIJ_4; 1490 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1491 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1492 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1493 break; 1494 case 5: 1495 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1496 B->ops->solve = MatSolve_SeqSBAIJ_5; 1497 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1498 B->ops->mult = MatMult_SeqSBAIJ_5; 1499 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1500 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1501 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1502 break; 1503 case 6: 1504 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1505 B->ops->solve = MatSolve_SeqSBAIJ_6; 1506 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1507 B->ops->mult = MatMult_SeqSBAIJ_6; 1508 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1509 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1510 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1511 break; 1512 case 7: 1513 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1514 B->ops->solve = MatSolve_SeqSBAIJ_7; 1515 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1516 B->ops->mult = MatMult_SeqSBAIJ_7; 1517 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1518 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1519 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1520 break; 1521 } 1522 } 1523 1524 b->mbs = mbs; 1525 b->nbs = mbs; 1526 if (!skipallocation) { 1527 /* b->ilen will count nonzeros in each block row so far. */ 1528 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1529 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1530 if (!nnz) { 1531 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1532 else if (nz <= 0) nz = 1; 1533 for (i=0; i<mbs; i++) { 1534 b->imax[i] = nz; 1535 } 1536 nz = nz*mbs; /* total nz */ 1537 } else { 1538 nz = 0; 1539 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1540 } 1541 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1542 1543 /* allocate the matrix space */ 1544 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1545 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1546 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1547 b->singlemalloc = PETSC_TRUE; 1548 1549 /* pointer to beginning of each row */ 1550 b->i[0] = 0; 1551 for (i=1; i<mbs+1; i++) { 1552 b->i[i] = b->i[i-1] + b->imax[i-1]; 1553 } 1554 } 1555 1556 B->bs = bs; 1557 b->bs2 = bs2; 1558 b->nz = 0; 1559 b->maxnz = nz*bs2; 1560 1561 b->inew = 0; 1562 b->jnew = 0; 1563 b->anew = 0; 1564 b->a2anew = 0; 1565 b->permute = PETSC_FALSE; 1566 PetscFunctionReturn(0); 1567 } 1568 EXTERN_C_END 1569 1570 EXTERN_C_BEGIN 1571 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1572 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1573 EXTERN_C_END 1574 1575 /*MC 1576 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1577 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1578 1579 Options Database Keys: 1580 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1581 1582 Level: beginner 1583 1584 .seealso: MatCreateSeqSBAIJ 1585 M*/ 1586 1587 EXTERN_C_BEGIN 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1590 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1591 { 1592 Mat_SeqSBAIJ *b; 1593 PetscErrorCode ierr; 1594 PetscMPIInt size; 1595 PetscTruth flg; 1596 1597 PetscFunctionBegin; 1598 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1599 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1600 B->m = B->M = PetscMax(B->m,B->M); 1601 B->n = B->N = PetscMax(B->n,B->N); 1602 1603 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1604 B->data = (void*)b; 1605 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1606 B->ops->destroy = MatDestroy_SeqSBAIJ; 1607 B->ops->view = MatView_SeqSBAIJ; 1608 B->factor = 0; 1609 B->mapping = 0; 1610 b->row = 0; 1611 b->icol = 0; 1612 b->reallocs = 0; 1613 b->saved_values = 0; 1614 1615 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1616 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1617 1618 b->sorted = PETSC_FALSE; 1619 b->roworiented = PETSC_TRUE; 1620 b->nonew = 0; 1621 b->diag = 0; 1622 b->solve_work = 0; 1623 b->mult_work = 0; 1624 B->spptr = 0; 1625 b->keepzeroedrows = PETSC_FALSE; 1626 b->xtoy = 0; 1627 b->XtoY = 0; 1628 1629 b->inew = 0; 1630 b->jnew = 0; 1631 b->anew = 0; 1632 b->a2anew = 0; 1633 b->permute = PETSC_FALSE; 1634 1635 b->ignore_ltriangular = PETSC_FALSE; 1636 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1637 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1638 1639 b->getrow_utriangular = PETSC_FALSE; 1640 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1641 if (flg) b->getrow_utriangular = PETSC_TRUE; 1642 1643 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1644 "MatStoreValues_SeqSBAIJ", 1645 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1646 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1647 "MatRetrieveValues_SeqSBAIJ", 1648 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1649 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1650 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1651 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1652 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1653 "MatConvert_SeqSBAIJ_SeqAIJ", 1654 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1656 "MatConvert_SeqSBAIJ_SeqBAIJ", 1657 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1659 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1660 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1661 1662 B->symmetric = PETSC_TRUE; 1663 B->structurally_symmetric = PETSC_TRUE; 1664 B->symmetric_set = PETSC_TRUE; 1665 B->structurally_symmetric_set = PETSC_TRUE; 1666 PetscFunctionReturn(0); 1667 } 1668 EXTERN_C_END 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1672 /*@C 1673 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1674 compressed row) format. For good matrix assembly performance the 1675 user should preallocate the matrix storage by setting the parameter nz 1676 (or the array nnz). By setting these parameters accurately, performance 1677 during matrix assembly can be increased by more than a factor of 50. 1678 1679 Collective on Mat 1680 1681 Input Parameters: 1682 + A - the symmetric matrix 1683 . bs - size of block 1684 . nz - number of block nonzeros per block row (same for all rows) 1685 - nnz - array containing the number of block nonzeros in the upper triangular plus 1686 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1687 1688 Options Database Keys: 1689 . -mat_no_unroll - uses code that does not unroll the loops in the 1690 block calculations (much slower) 1691 . -mat_block_size - size of the blocks to use 1692 1693 Level: intermediate 1694 1695 Notes: 1696 Specify the preallocated storage with either nz or nnz (not both). 1697 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1698 allocation. For additional details, see the users manual chapter on 1699 matrices. 1700 1701 If the nnz parameter is given then the nz parameter is ignored 1702 1703 1704 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1705 @*/ 1706 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1707 { 1708 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1709 1710 PetscFunctionBegin; 1711 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1712 if (f) { 1713 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1714 } 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatCreateSeqSBAIJ" 1720 /*@C 1721 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1722 compressed row) format. For good matrix assembly performance the 1723 user should preallocate the matrix storage by setting the parameter nz 1724 (or the array nnz). By setting these parameters accurately, performance 1725 during matrix assembly can be increased by more than a factor of 50. 1726 1727 Collective on MPI_Comm 1728 1729 Input Parameters: 1730 + comm - MPI communicator, set to PETSC_COMM_SELF 1731 . bs - size of block 1732 . m - number of rows, or number of columns 1733 . nz - number of block nonzeros per block row (same for all rows) 1734 - nnz - array containing the number of block nonzeros in the upper triangular plus 1735 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1736 1737 Output Parameter: 1738 . A - the symmetric matrix 1739 1740 Options Database Keys: 1741 . -mat_no_unroll - uses code that does not unroll the loops in the 1742 block calculations (much slower) 1743 . -mat_block_size - size of the blocks to use 1744 1745 Level: intermediate 1746 1747 Notes: 1748 1749 Specify the preallocated storage with either nz or nnz (not both). 1750 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1751 allocation. For additional details, see the users manual chapter on 1752 matrices. 1753 1754 If the nnz parameter is given then the nz parameter is ignored 1755 1756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1757 @*/ 1758 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1759 { 1760 PetscErrorCode ierr; 1761 1762 PetscFunctionBegin; 1763 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1764 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1765 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1766 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1772 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1773 { 1774 Mat C; 1775 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1776 PetscErrorCode ierr; 1777 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1778 1779 PetscFunctionBegin; 1780 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1781 1782 *B = 0; 1783 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1784 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 1785 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1786 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1787 c = (Mat_SeqSBAIJ*)C->data; 1788 1789 C->preallocated = PETSC_TRUE; 1790 C->factor = A->factor; 1791 c->row = 0; 1792 c->icol = 0; 1793 c->saved_values = 0; 1794 c->keepzeroedrows = a->keepzeroedrows; 1795 C->assembled = PETSC_TRUE; 1796 1797 C->M = A->M; 1798 C->N = A->N; 1799 C->bs = A->bs; 1800 c->bs2 = a->bs2; 1801 c->mbs = a->mbs; 1802 c->nbs = a->nbs; 1803 1804 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1805 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1806 for (i=0; i<mbs; i++) { 1807 c->imax[i] = a->imax[i]; 1808 c->ilen[i] = a->ilen[i]; 1809 } 1810 ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1811 1812 /* allocate the matrix space */ 1813 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1814 c->singlemalloc = PETSC_TRUE; 1815 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1816 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1817 if (mbs > 0) { 1818 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1819 if (cpvalues == MAT_COPY_VALUES) { 1820 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1821 } else { 1822 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1823 } 1824 } 1825 1826 c->sorted = a->sorted; 1827 c->roworiented = a->roworiented; 1828 c->nonew = a->nonew; 1829 1830 if (a->diag) { 1831 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1832 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1833 for (i=0; i<mbs; i++) { 1834 c->diag[i] = a->diag[i]; 1835 } 1836 } else c->diag = 0; 1837 c->nz = a->nz; 1838 c->maxnz = a->maxnz; 1839 c->solve_work = 0; 1840 c->mult_work = 0; 1841 *B = C; 1842 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1848 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1849 { 1850 Mat_SeqSBAIJ *a; 1851 Mat B; 1852 PetscErrorCode ierr; 1853 int fd; 1854 PetscMPIInt size; 1855 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1856 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1857 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1858 PetscInt *masked,nmask,tmp,bs2,ishift; 1859 PetscScalar *aa; 1860 MPI_Comm comm = ((PetscObject)viewer)->comm; 1861 1862 PetscFunctionBegin; 1863 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1864 bs2 = bs*bs; 1865 1866 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1867 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1868 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1869 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1870 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1871 M = header[1]; N = header[2]; nz = header[3]; 1872 1873 if (header[3] < 0) { 1874 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1875 } 1876 1877 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1878 1879 /* 1880 This code adds extra rows to make sure the number of rows is 1881 divisible by the blocksize 1882 */ 1883 mbs = M/bs; 1884 extra_rows = bs - M + bs*(mbs); 1885 if (extra_rows == bs) extra_rows = 0; 1886 else mbs++; 1887 if (extra_rows) { 1888 ierr = PetscVerboseInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1889 } 1890 1891 /* read in row lengths */ 1892 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1893 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1894 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1895 1896 /* read in column indices */ 1897 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1898 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1899 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1900 1901 /* loop over row lengths determining block row lengths */ 1902 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1903 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1904 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1905 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1906 masked = mask + mbs; 1907 rowcount = 0; nzcount = 0; 1908 for (i=0; i<mbs; i++) { 1909 nmask = 0; 1910 for (j=0; j<bs; j++) { 1911 kmax = rowlengths[rowcount]; 1912 for (k=0; k<kmax; k++) { 1913 tmp = jj[nzcount++]/bs; /* block col. index */ 1914 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1915 } 1916 rowcount++; 1917 } 1918 s_browlengths[i] += nmask; 1919 1920 /* zero out the mask elements we set */ 1921 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1922 } 1923 1924 /* create our matrix */ 1925 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1926 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1927 ierr = MatSetType(B,type);CHKERRQ(ierr); 1928 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1929 a = (Mat_SeqSBAIJ*)B->data; 1930 1931 /* set matrix "i" values */ 1932 a->i[0] = 0; 1933 for (i=1; i<= mbs; i++) { 1934 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1935 a->ilen[i-1] = s_browlengths[i-1]; 1936 } 1937 a->nz = a->i[mbs]; 1938 1939 /* read in nonzero values */ 1940 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1941 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1942 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1943 1944 /* set "a" and "j" values into matrix */ 1945 nzcount = 0; jcount = 0; 1946 for (i=0; i<mbs; i++) { 1947 nzcountb = nzcount; 1948 nmask = 0; 1949 for (j=0; j<bs; j++) { 1950 kmax = rowlengths[i*bs+j]; 1951 for (k=0; k<kmax; k++) { 1952 tmp = jj[nzcount++]/bs; /* block col. index */ 1953 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1954 } 1955 } 1956 /* sort the masked values */ 1957 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1958 1959 /* set "j" values into matrix */ 1960 maskcount = 1; 1961 for (j=0; j<nmask; j++) { 1962 a->j[jcount++] = masked[j]; 1963 mask[masked[j]] = maskcount++; 1964 } 1965 1966 /* set "a" values into matrix */ 1967 ishift = bs2*a->i[i]; 1968 for (j=0; j<bs; j++) { 1969 kmax = rowlengths[i*bs+j]; 1970 for (k=0; k<kmax; k++) { 1971 tmp = jj[nzcountb]/bs ; /* block col. index */ 1972 if (tmp >= i){ 1973 block = mask[tmp] - 1; 1974 point = jj[nzcountb] - bs*tmp; 1975 idx = ishift + bs2*block + j + bs*point; 1976 a->a[idx] = aa[nzcountb]; 1977 } 1978 nzcountb++; 1979 } 1980 } 1981 /* zero out the mask elements we set */ 1982 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1983 } 1984 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1985 1986 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1987 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1988 ierr = PetscFree(aa);CHKERRQ(ierr); 1989 ierr = PetscFree(jj);CHKERRQ(ierr); 1990 ierr = PetscFree(mask);CHKERRQ(ierr); 1991 1992 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1993 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1994 ierr = MatView_Private(B);CHKERRQ(ierr); 1995 *A = B; 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatRelax_SeqSBAIJ" 2001 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2002 { 2003 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2004 MatScalar *aa=a->a,*v,*v1; 2005 PetscScalar *x,*b,*t,sum,d; 2006 PetscErrorCode ierr; 2007 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 2008 PetscInt nz,nz1,*vj,*vj1,i; 2009 2010 PetscFunctionBegin; 2011 its = its*lits; 2012 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2013 2014 if (bs > 1) 2015 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2016 2017 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2018 if (xx != bb) { 2019 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2020 } else { 2021 b = x; 2022 } 2023 2024 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 2025 2026 if (flag & SOR_ZERO_INITIAL_GUESS) { 2027 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2028 for (i=0; i<m; i++) 2029 t[i] = b[i]; 2030 2031 for (i=0; i<m; i++){ 2032 d = *(aa + ai[i]); /* diag[i] */ 2033 v = aa + ai[i] + 1; 2034 vj = aj + ai[i] + 1; 2035 nz = ai[i+1] - ai[i] - 1; 2036 x[i] = omega*t[i]/d; 2037 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2038 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2039 } 2040 } 2041 2042 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2043 for (i=0; i<m; i++) 2044 t[i] = b[i]; 2045 2046 for (i=0; i<m-1; i++){ /* update rhs */ 2047 v = aa + ai[i] + 1; 2048 vj = aj + ai[i] + 1; 2049 nz = ai[i+1] - ai[i] - 1; 2050 while (nz--) t[*vj++] -= x[i]*(*v++); 2051 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2052 } 2053 for (i=m-1; i>=0; i--){ 2054 d = *(aa + ai[i]); 2055 v = aa + ai[i] + 1; 2056 vj = aj + ai[i] + 1; 2057 nz = ai[i+1] - ai[i] - 1; 2058 sum = t[i]; 2059 while (nz--) sum -= x[*vj++]*(*v++); 2060 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2061 x[i] = (1-omega)*x[i] + omega*sum/d; 2062 } 2063 } 2064 its--; 2065 } 2066 2067 while (its--) { 2068 /* 2069 forward sweep: 2070 for i=0,...,m-1: 2071 sum[i] = (b[i] - U(i,:)x )/d[i]; 2072 x[i] = (1-omega)x[i] + omega*sum[i]; 2073 b = b - x[i]*U^T(i,:); 2074 2075 */ 2076 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2077 for (i=0; i<m; i++) 2078 t[i] = b[i]; 2079 2080 for (i=0; i<m; i++){ 2081 d = *(aa + ai[i]); /* diag[i] */ 2082 v = aa + ai[i] + 1; v1=v; 2083 vj = aj + ai[i] + 1; vj1=vj; 2084 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2085 sum = t[i]; 2086 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2087 x[i] = (1-omega)*x[i] + omega*sum/d; 2088 while (nz--) t[*vj++] -= x[i]*(*v++); 2089 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2090 } 2091 } 2092 2093 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2094 /* 2095 backward sweep: 2096 b = b - x[i]*U^T(i,:), i=0,...,n-2 2097 for i=m-1,...,0: 2098 sum[i] = (b[i] - U(i,:)x )/d[i]; 2099 x[i] = (1-omega)x[i] + omega*sum[i]; 2100 */ 2101 for (i=0; i<m; i++) 2102 t[i] = b[i]; 2103 2104 for (i=0; i<m-1; i++){ /* update rhs */ 2105 v = aa + ai[i] + 1; 2106 vj = aj + ai[i] + 1; 2107 nz = ai[i+1] - ai[i] - 1; 2108 while (nz--) t[*vj++] -= x[i]*(*v++); 2109 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2110 } 2111 for (i=m-1; i>=0; i--){ 2112 d = *(aa + ai[i]); 2113 v = aa + ai[i] + 1; 2114 vj = aj + ai[i] + 1; 2115 nz = ai[i+1] - ai[i] - 1; 2116 sum = t[i]; 2117 while (nz--) sum -= x[*vj++]*(*v++); 2118 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2119 x[i] = (1-omega)*x[i] + omega*sum/d; 2120 } 2121 } 2122 } 2123 2124 ierr = PetscFree(t);CHKERRQ(ierr); 2125 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2126 if (bb != xx) { 2127 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2128 } 2129 PetscFunctionReturn(0); 2130 } 2131 2132 2133 2134 2135 2136 2137