1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i; 40 41 PetscFunctionBegin; 42 if (!a->diag) { 43 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 } 45 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 #if defined(PETSC_USE_LOG) 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 100 #endif 101 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 102 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 103 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 104 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 105 ierr = PetscFree(a->diag);CHKERRQ(ierr); 106 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 107 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 108 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 109 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 110 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 111 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 112 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 113 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 ierr = PetscFree(a);CHKERRQ(ierr); 116 117 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 118 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 119 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 120 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 129 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 130 { 131 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 switch (op) { 136 case MAT_ROW_ORIENTED: 137 a->roworiented = PETSC_TRUE; 138 break; 139 case MAT_COLUMN_ORIENTED: 140 a->roworiented = PETSC_FALSE; 141 break; 142 case MAT_COLUMNS_SORTED: 143 a->sorted = PETSC_TRUE; 144 break; 145 case MAT_COLUMNS_UNSORTED: 146 a->sorted = PETSC_FALSE; 147 break; 148 case MAT_KEEP_ZEROED_ROWS: 149 a->keepzeroedrows = PETSC_TRUE; 150 break; 151 case MAT_NO_NEW_NONZERO_LOCATIONS: 152 a->nonew = 1; 153 break; 154 case MAT_NEW_NONZERO_LOCATION_ERR: 155 a->nonew = -1; 156 break; 157 case MAT_NEW_NONZERO_ALLOCATION_ERR: 158 a->nonew = -2; 159 break; 160 case MAT_YES_NEW_NONZERO_LOCATIONS: 161 a->nonew = 0; 162 break; 163 case MAT_ROWS_SORTED: 164 case MAT_ROWS_UNSORTED: 165 case MAT_YES_NEW_DIAGONALS: 166 case MAT_IGNORE_OFF_PROC_ENTRIES: 167 case MAT_USE_HASH_TABLE: 168 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 169 break; 170 case MAT_NO_NEW_DIAGONALS: 171 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 172 case MAT_NOT_SYMMETRIC: 173 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 174 case MAT_HERMITIAN: 175 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 176 case MAT_SYMMETRIC: 177 case MAT_STRUCTURALLY_SYMMETRIC: 178 case MAT_NOT_HERMITIAN: 179 case MAT_SYMMETRY_ETERNAL: 180 case MAT_NOT_SYMMETRY_ETERNAL: 181 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 182 break; 183 case MAT_IGNORE_LOWER_TRIANGULAR: 184 a->ignore_ltriangular = PETSC_TRUE; 185 break; 186 case MAT_ERROR_LOWER_TRIANGULAR: 187 a->ignore_ltriangular = PETSC_FALSE; 188 break; 189 case MAT_GETROW_UPPERTRIANGULAR: 190 a->getrow_utriangular = PETSC_TRUE; 191 break; 192 default: 193 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 201 { 202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 203 PetscErrorCode ierr; 204 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 205 MatScalar *aa,*aa_i; 206 PetscScalar *v_i; 207 208 PetscFunctionBegin; 209 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()"); 210 /* Get the upper triangular part of the row */ 211 bs = A->rmap.bs; 212 ai = a->i; 213 aj = a->j; 214 aa = a->a; 215 bs2 = a->bs2; 216 217 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 218 219 bn = row/bs; /* Block number */ 220 bp = row % bs; /* Block position */ 221 M = ai[bn+1] - ai[bn]; 222 *ncols = bs*M; 223 224 if (v) { 225 *v = 0; 226 if (*ncols) { 227 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 228 for (i=0; i<M; i++) { /* for each block in the block row */ 229 v_i = *v + i*bs; 230 aa_i = aa + bs2*(ai[bn] + i); 231 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 232 } 233 } 234 } 235 236 if (cols) { 237 *cols = 0; 238 if (*ncols) { 239 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 240 for (i=0; i<M; i++) { /* for each block in the block row */ 241 cols_i = *cols + i*bs; 242 itmp = bs*aj[ai[bn] + i]; 243 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 244 } 245 } 246 } 247 248 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 249 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 250 #ifdef column_search 251 v_i = *v + M*bs; 252 cols_i = *cols + M*bs; 253 for (i=0; i<bn; i++){ /* for each block row */ 254 M = ai[i+1] - ai[i]; 255 for (j=0; j<M; j++){ 256 itmp = aj[ai[i] + j]; /* block column value */ 257 if (itmp == bn){ 258 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 259 for (k=0; k<bs; k++) { 260 *cols_i++ = i*bs+k; 261 *v_i++ = aa_i[k]; 262 } 263 *ncols += bs; 264 break; 265 } 266 } 267 } 268 #endif 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 274 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 275 { 276 PetscErrorCode ierr; 277 278 PetscFunctionBegin; 279 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 280 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 286 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 287 { 288 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 289 290 PetscFunctionBegin; 291 a->getrow_utriangular = PETSC_TRUE; 292 PetscFunctionReturn(0); 293 } 294 #undef __FUNCT__ 295 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 296 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 297 { 298 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 299 300 PetscFunctionBegin; 301 a->getrow_utriangular = PETSC_FALSE; 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 307 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 308 { 309 PetscErrorCode ierr; 310 PetscFunctionBegin; 311 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 317 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 318 { 319 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 320 PetscErrorCode ierr; 321 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 322 const char *name; 323 PetscViewerFormat format; 324 325 PetscFunctionBegin; 326 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 327 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 328 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 329 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 330 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 331 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 332 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 333 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 334 for (i=0; i<a->mbs; i++) { 335 for (j=0; j<bs; j++) { 336 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 337 for (k=a->i[i]; k<a->i[i+1]; k++) { 338 for (l=0; l<bs; l++) { 339 #if defined(PETSC_USE_COMPLEX) 340 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 341 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 342 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 343 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } 349 #else 350 if (a->a[bs2*k + l*bs + j] != 0.0) { 351 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 352 } 353 #endif 354 } 355 } 356 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 357 } 358 } 359 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 360 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 361 PetscFunctionReturn(0); 362 } else { 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 364 for (i=0; i<a->mbs; i++) { 365 for (j=0; j<bs; j++) { 366 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 367 for (k=a->i[i]; k<a->i[i+1]; k++) { 368 for (l=0; l<bs; l++) { 369 #if defined(PETSC_USE_COMPLEX) 370 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 371 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 372 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 373 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 374 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 375 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 376 } else { 377 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 378 } 379 #else 380 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 381 #endif 382 } 383 } 384 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 385 } 386 } 387 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 388 } 389 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 395 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 396 { 397 Mat A = (Mat) Aa; 398 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 399 PetscErrorCode ierr; 400 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 401 PetscMPIInt rank; 402 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 403 MatScalar *aa; 404 MPI_Comm comm; 405 PetscViewer viewer; 406 407 PetscFunctionBegin; 408 /* 409 This is nasty. If this is called from an originally parallel matrix 410 then all processes call this,but only the first has the matrix so the 411 rest should return immediately. 412 */ 413 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 414 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 415 if (rank) PetscFunctionReturn(0); 416 417 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 418 419 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 420 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 421 422 /* loop over matrix elements drawing boxes */ 423 color = PETSC_DRAW_BLUE; 424 for (i=0,row=0; i<mbs; i++,row+=bs) { 425 for (j=a->i[i]; j<a->i[i+1]; j++) { 426 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 427 x_l = a->j[j]*bs; x_r = x_l + 1.0; 428 aa = a->a + j*bs2; 429 for (k=0; k<bs; k++) { 430 for (l=0; l<bs; l++) { 431 if (PetscRealPart(*aa++) >= 0.) continue; 432 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 433 } 434 } 435 } 436 } 437 color = PETSC_DRAW_CYAN; 438 for (i=0,row=0; i<mbs; i++,row+=bs) { 439 for (j=a->i[i]; j<a->i[i+1]; j++) { 440 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 441 x_l = a->j[j]*bs; x_r = x_l + 1.0; 442 aa = a->a + j*bs2; 443 for (k=0; k<bs; k++) { 444 for (l=0; l<bs; l++) { 445 if (PetscRealPart(*aa++) != 0.) continue; 446 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 452 color = PETSC_DRAW_RED; 453 for (i=0,row=0; i<mbs; i++,row+=bs) { 454 for (j=a->i[i]; j<a->i[i+1]; j++) { 455 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 456 x_l = a->j[j]*bs; x_r = x_l + 1.0; 457 aa = a->a + j*bs2; 458 for (k=0; k<bs; k++) { 459 for (l=0; l<bs; l++) { 460 if (PetscRealPart(*aa++) <= 0.) continue; 461 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 471 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 472 { 473 PetscErrorCode ierr; 474 PetscReal xl,yl,xr,yr,w,h; 475 PetscDraw draw; 476 PetscTruth isnull; 477 478 PetscFunctionBegin; 479 480 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 481 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 482 483 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 484 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 485 xr += w; yr += h; xl = -w; yl = -h; 486 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 487 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 488 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatView_SeqSBAIJ" 494 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 495 { 496 PetscErrorCode ierr; 497 PetscTruth iascii,isdraw; 498 499 PetscFunctionBegin; 500 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 501 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 502 if (iascii){ 503 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 504 } else if (isdraw) { 505 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 506 } else { 507 Mat B; 508 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 509 ierr = MatView(B,viewer);CHKERRQ(ierr); 510 ierr = MatDestroy(B);CHKERRQ(ierr); 511 } 512 PetscFunctionReturn(0); 513 } 514 515 516 #undef __FUNCT__ 517 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 518 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 519 { 520 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 521 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 522 PetscInt *ai = a->i,*ailen = a->ilen; 523 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 524 MatScalar *ap,*aa = a->a,zero = 0.0; 525 526 PetscFunctionBegin; 527 for (k=0; k<m; k++) { /* loop over rows */ 528 row = im[k]; brow = row/bs; 529 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 530 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 531 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 532 nrow = ailen[brow]; 533 for (l=0; l<n; l++) { /* loop over columns */ 534 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 535 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 536 col = in[l] ; 537 bcol = col/bs; 538 cidx = col%bs; 539 ridx = row%bs; 540 high = nrow; 541 low = 0; /* assume unsorted */ 542 while (high-low > 5) { 543 t = (low+high)/2; 544 if (rp[t] > bcol) high = t; 545 else low = t; 546 } 547 for (i=low; i<high; i++) { 548 if (rp[i] > bcol) break; 549 if (rp[i] == bcol) { 550 *v++ = ap[bs2*i+bs*cidx+ridx]; 551 goto finished; 552 } 553 } 554 *v++ = zero; 555 finished:; 556 } 557 } 558 PetscFunctionReturn(0); 559 } 560 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 564 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 565 { 566 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 567 PetscErrorCode ierr; 568 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 569 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 570 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 571 PetscTruth roworiented=a->roworiented; 572 const MatScalar *value = v; 573 MatScalar *ap,*aa = a->a,*bap; 574 575 PetscFunctionBegin; 576 if (roworiented) { 577 stepval = (n-1)*bs; 578 } else { 579 stepval = (m-1)*bs; 580 } 581 for (k=0; k<m; k++) { /* loop over added rows */ 582 row = im[k]; 583 if (row < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 586 #endif 587 rp = aj + ai[row]; 588 ap = aa + bs2*ai[row]; 589 rmax = imax[row]; 590 nrow = ailen[row]; 591 low = 0; 592 high = nrow; 593 for (l=0; l<n; l++) { /* loop over added columns */ 594 if (in[l] < 0) continue; 595 col = in[l]; 596 #if defined(PETSC_USE_DEBUG) 597 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 598 #endif 599 if (col < row) continue; /* ignore lower triangular block */ 600 if (roworiented) { 601 value = v + k*(stepval+bs)*bs + l*bs; 602 } else { 603 value = v + l*(stepval+bs)*bs + k*bs; 604 } 605 if (col <= lastcol) low = 0; else high = nrow; 606 lastcol = col; 607 while (high-low > 7) { 608 t = (low+high)/2; 609 if (rp[t] > col) high = t; 610 else low = t; 611 } 612 for (i=low; i<high; i++) { 613 if (rp[i] > col) break; 614 if (rp[i] == col) { 615 bap = ap + bs2*i; 616 if (roworiented) { 617 if (is == ADD_VALUES) { 618 for (ii=0; ii<bs; ii++,value+=stepval) { 619 for (jj=ii; jj<bs2; jj+=bs) { 620 bap[jj] += *value++; 621 } 622 } 623 } else { 624 for (ii=0; ii<bs; ii++,value+=stepval) { 625 for (jj=ii; jj<bs2; jj+=bs) { 626 bap[jj] = *value++; 627 } 628 } 629 } 630 } else { 631 if (is == ADD_VALUES) { 632 for (ii=0; ii<bs; ii++,value+=stepval) { 633 for (jj=0; jj<bs; jj++) { 634 *bap++ += *value++; 635 } 636 } 637 } else { 638 for (ii=0; ii<bs; ii++,value+=stepval) { 639 for (jj=0; jj<bs; jj++) { 640 *bap++ = *value++; 641 } 642 } 643 } 644 } 645 goto noinsert2; 646 } 647 } 648 if (nonew == 1) goto noinsert2; 649 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 650 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 651 N = nrow++ - 1; high++; 652 /* shift up all the later entries in this row */ 653 for (ii=N; ii>=i; ii--) { 654 rp[ii+1] = rp[ii]; 655 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 656 } 657 if (N >= i) { 658 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 659 } 660 rp[i] = col; 661 bap = ap + bs2*i; 662 if (roworiented) { 663 for (ii=0; ii<bs; ii++,value+=stepval) { 664 for (jj=ii; jj<bs2; jj+=bs) { 665 bap[jj] = *value++; 666 } 667 } 668 } else { 669 for (ii=0; ii<bs; ii++,value+=stepval) { 670 for (jj=0; jj<bs; jj++) { 671 *bap++ = *value++; 672 } 673 } 674 } 675 noinsert2:; 676 low = i; 677 } 678 ailen[row] = nrow; 679 } 680 PetscFunctionReturn(0); 681 } 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 685 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 686 { 687 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 688 PetscErrorCode ierr; 689 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 690 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 691 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 692 MatScalar *aa = a->a,*ap; 693 694 PetscFunctionBegin; 695 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 696 697 if (m) rmax = ailen[0]; 698 for (i=1; i<mbs; i++) { 699 /* move each row back by the amount of empty slots (fshift) before it*/ 700 fshift += imax[i-1] - ailen[i-1]; 701 rmax = PetscMax(rmax,ailen[i]); 702 if (fshift) { 703 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 704 N = ailen[i]; 705 for (j=0; j<N; j++) { 706 ip[j-fshift] = ip[j]; 707 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 708 } 709 } 710 ai[i] = ai[i-1] + ailen[i-1]; 711 } 712 if (mbs) { 713 fshift += imax[mbs-1] - ailen[mbs-1]; 714 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 715 } 716 /* reset ilen and imax for each row */ 717 for (i=0; i<mbs; i++) { 718 ailen[i] = imax[i] = ai[i+1] - ai[i]; 719 } 720 a->nz = ai[mbs]; 721 722 /* diagonals may have moved, reset it */ 723 if (a->diag) { 724 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 725 } 726 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 727 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 728 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 729 a->reallocs = 0; 730 A->info.nz_unneeded = (PetscReal)fshift*bs2; 731 PetscFunctionReturn(0); 732 } 733 734 /* 735 This function returns an array of flags which indicate the locations of contiguous 736 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 737 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 738 Assume: sizes should be long enough to hold all the values. 739 */ 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 742 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 743 { 744 PetscInt i,j,k,row; 745 PetscTruth flg; 746 747 PetscFunctionBegin; 748 for (i=0,j=0; i<n; j++) { 749 row = idx[i]; 750 if (row%bs!=0) { /* Not the begining of a block */ 751 sizes[j] = 1; 752 i++; 753 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 754 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 755 i++; 756 } else { /* Begining of the block, so check if the complete block exists */ 757 flg = PETSC_TRUE; 758 for (k=1; k<bs; k++) { 759 if (row+k != idx[i+k]) { /* break in the block */ 760 flg = PETSC_FALSE; 761 break; 762 } 763 } 764 if (flg) { /* No break in the bs */ 765 sizes[j] = bs; 766 i+= bs; 767 } else { 768 sizes[j] = 1; 769 i++; 770 } 771 } 772 } 773 *bs_max = j; 774 PetscFunctionReturn(0); 775 } 776 777 778 /* Only add/insert a(i,j) with i<=j (blocks). 779 Any a(i,j) with i>j input by user is ingored. 780 */ 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 784 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 785 { 786 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 787 PetscErrorCode ierr; 788 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 789 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 790 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 791 PetscInt ridx,cidx,bs2=a->bs2; 792 MatScalar *ap,value,*aa=a->a,*bap; 793 794 PetscFunctionBegin; 795 for (k=0; k<m; k++) { /* loop over added rows */ 796 row = im[k]; /* row number */ 797 brow = row/bs; /* block row number */ 798 if (row < 0) continue; 799 #if defined(PETSC_USE_DEBUG) 800 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 801 #endif 802 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 803 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 804 rmax = imax[brow]; /* maximum space allocated for this row */ 805 nrow = ailen[brow]; /* actual length of this row */ 806 low = 0; 807 808 for (l=0; l<n; l++) { /* loop over added columns */ 809 if (in[l] < 0) continue; 810 #if defined(PETSC_USE_DEBUG) 811 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 812 #endif 813 col = in[l]; 814 bcol = col/bs; /* block col number */ 815 816 if (brow > bcol) { 817 if (a->ignore_ltriangular){ 818 continue; /* ignore lower triangular values */ 819 } else { 820 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 821 } 822 } 823 824 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 825 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 826 /* element value a(k,l) */ 827 if (roworiented) { 828 value = v[l + k*n]; 829 } else { 830 value = v[k + l*m]; 831 } 832 833 /* move pointer bap to a(k,l) quickly and add/insert value */ 834 if (col <= lastcol) low = 0; high = nrow; 835 lastcol = col; 836 while (high-low > 7) { 837 t = (low+high)/2; 838 if (rp[t] > bcol) high = t; 839 else low = t; 840 } 841 for (i=low; i<high; i++) { 842 if (rp[i] > bcol) break; 843 if (rp[i] == bcol) { 844 bap = ap + bs2*i + bs*cidx + ridx; 845 if (is == ADD_VALUES) *bap += value; 846 else *bap = value; 847 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 848 if (brow == bcol && ridx < cidx){ 849 bap = ap + bs2*i + bs*ridx + cidx; 850 if (is == ADD_VALUES) *bap += value; 851 else *bap = value; 852 } 853 goto noinsert1; 854 } 855 } 856 857 if (nonew == 1) goto noinsert1; 858 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 859 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 860 861 N = nrow++ - 1; high++; 862 /* shift up all the later entries in this row */ 863 for (ii=N; ii>=i; ii--) { 864 rp[ii+1] = rp[ii]; 865 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 866 } 867 if (N>=i) { 868 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 869 } 870 rp[i] = bcol; 871 ap[bs2*i + bs*cidx + ridx] = value; 872 noinsert1:; 873 low = i; 874 } 875 } /* end of loop over added columns */ 876 ailen[brow] = nrow; 877 } /* end of loop over added rows */ 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 883 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 884 { 885 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 886 Mat outA; 887 PetscErrorCode ierr; 888 PetscTruth row_identity; 889 890 PetscFunctionBegin; 891 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 892 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 893 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 894 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 895 896 outA = inA; 897 inA->factor = FACTOR_CHOLESKY; 898 899 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 900 /* 901 Blocksize < 8 have a special faster factorization/solver 902 for ICC(0) factorization with natural ordering 903 */ 904 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 905 case 1: 906 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 907 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 908 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 909 inA->ops->solves = MatSolves_SeqSBAIJ_1; 910 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 911 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 912 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 913 break; 914 case 2: 915 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 916 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 917 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 918 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 919 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 920 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 921 break; 922 case 3: 923 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 924 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 925 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 926 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 927 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 928 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 929 break; 930 case 4: 931 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 932 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 933 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 934 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 935 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 936 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 937 break; 938 case 5: 939 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 940 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 941 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 942 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 943 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 944 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 945 break; 946 case 6: 947 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 948 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 949 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 950 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 951 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 952 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 953 break; 954 case 7: 955 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 956 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 957 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 958 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 959 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 960 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 961 break; 962 default: 963 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 964 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 965 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 966 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 967 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 968 break; 969 } 970 971 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 972 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 973 a->row = row; 974 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 975 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 976 a->col = row; 977 978 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 979 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 980 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 981 982 if (!a->solve_work) { 983 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 984 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 985 } 986 987 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 EXTERN_C_BEGIN 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 994 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 995 { 996 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 997 PetscInt i,nz,n; 998 999 PetscFunctionBegin; 1000 nz = baij->maxnz; 1001 n = mat->cmap.n; 1002 for (i=0; i<nz; i++) { 1003 baij->j[i] = indices[i]; 1004 } 1005 baij->nz = nz; 1006 for (i=0; i<n; i++) { 1007 baij->ilen[i] = baij->imax[i]; 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 EXTERN_C_END 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1015 /*@ 1016 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1017 in the matrix. 1018 1019 Input Parameters: 1020 + mat - the SeqSBAIJ matrix 1021 - indices - the column indices 1022 1023 Level: advanced 1024 1025 Notes: 1026 This can be called if you have precomputed the nonzero structure of the 1027 matrix and want to provide it to the matrix object to improve the performance 1028 of the MatSetValues() operation. 1029 1030 You MUST have set the correct numbers of nonzeros per row in the call to 1031 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1032 1033 MUST be called before any calls to MatSetValues() 1034 1035 .seealso: MatCreateSeqSBAIJ 1036 @*/ 1037 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1038 { 1039 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1040 1041 PetscFunctionBegin; 1042 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1043 PetscValidPointer(indices,2); 1044 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1045 if (f) { 1046 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1047 } else { 1048 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1049 } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1055 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1056 { 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* If the two matrices have the same copy implementation, use fast copy. */ 1061 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1062 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1063 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1064 1065 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1066 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1067 } 1068 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1069 } else { 1070 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1071 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1072 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1073 } 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1079 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1080 { 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1090 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1091 { 1092 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1093 PetscFunctionBegin; 1094 *array = a->a; 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #undef __FUNCT__ 1099 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1100 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1101 { 1102 PetscFunctionBegin; 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #include "petscblaslapack.h" 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1109 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1110 { 1111 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1112 PetscErrorCode ierr; 1113 PetscInt i,bs=Y->rmap.bs,bs2,j; 1114 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1115 1116 PetscFunctionBegin; 1117 if (str == SAME_NONZERO_PATTERN) { 1118 PetscScalar alpha = a; 1119 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1120 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1121 if (y->xtoy && y->XtoY != X) { 1122 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1123 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1124 } 1125 if (!y->xtoy) { /* get xtoy */ 1126 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1127 y->XtoY = X; 1128 } 1129 bs2 = bs*bs; 1130 for (i=0; i<x->nz; i++) { 1131 j = 0; 1132 while (j < bs2){ 1133 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1134 j++; 1135 } 1136 } 1137 ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1138 } else { 1139 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1140 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1141 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1142 } 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1148 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1149 { 1150 PetscFunctionBegin; 1151 *flg = PETSC_TRUE; 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1157 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1158 { 1159 PetscFunctionBegin; 1160 *flg = PETSC_TRUE; 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1166 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1167 { 1168 PetscFunctionBegin; 1169 *flg = PETSC_FALSE; 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1175 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1176 { 1177 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1178 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1179 PetscScalar *aa = a->a; 1180 1181 PetscFunctionBegin; 1182 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1188 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1189 { 1190 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1191 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1192 PetscScalar *aa = a->a; 1193 1194 PetscFunctionBegin; 1195 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 /* -------------------------------------------------------------------*/ 1200 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1201 MatGetRow_SeqSBAIJ, 1202 MatRestoreRow_SeqSBAIJ, 1203 MatMult_SeqSBAIJ_N, 1204 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1205 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1206 MatMultAdd_SeqSBAIJ_N, 1207 MatSolve_SeqSBAIJ_N, 1208 0, 1209 0, 1210 /*10*/ 0, 1211 0, 1212 MatCholeskyFactor_SeqSBAIJ, 1213 MatRelax_SeqSBAIJ, 1214 MatTranspose_SeqSBAIJ, 1215 /*15*/ MatGetInfo_SeqSBAIJ, 1216 MatEqual_SeqSBAIJ, 1217 MatGetDiagonal_SeqSBAIJ, 1218 MatDiagonalScale_SeqSBAIJ, 1219 MatNorm_SeqSBAIJ, 1220 /*20*/ 0, 1221 MatAssemblyEnd_SeqSBAIJ, 1222 0, 1223 MatSetOption_SeqSBAIJ, 1224 MatZeroEntries_SeqSBAIJ, 1225 /*25*/ 0, 1226 0, 1227 0, 1228 MatCholeskyFactorSymbolic_SeqSBAIJ, 1229 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1230 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1231 0, 1232 MatICCFactorSymbolic_SeqSBAIJ, 1233 MatGetArray_SeqSBAIJ, 1234 MatRestoreArray_SeqSBAIJ, 1235 /*35*/ MatDuplicate_SeqSBAIJ, 1236 MatForwardSolve_SeqSBAIJ_N, 1237 MatBackwardSolve_SeqSBAIJ_N, 1238 0, 1239 MatICCFactor_SeqSBAIJ, 1240 /*40*/ MatAXPY_SeqSBAIJ, 1241 MatGetSubMatrices_SeqSBAIJ, 1242 MatIncreaseOverlap_SeqSBAIJ, 1243 MatGetValues_SeqSBAIJ, 1244 MatCopy_SeqSBAIJ, 1245 /*45*/ 0, 1246 MatScale_SeqSBAIJ, 1247 0, 1248 0, 1249 0, 1250 /*50*/ 0, 1251 MatGetRowIJ_SeqSBAIJ, 1252 MatRestoreRowIJ_SeqSBAIJ, 1253 0, 1254 0, 1255 /*55*/ 0, 1256 0, 1257 0, 1258 0, 1259 MatSetValuesBlocked_SeqSBAIJ, 1260 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1261 0, 1262 0, 1263 0, 1264 0, 1265 /*65*/ 0, 1266 0, 1267 0, 1268 0, 1269 0, 1270 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1271 0, 1272 0, 1273 0, 1274 0, 1275 /*75*/ 0, 1276 0, 1277 0, 1278 0, 1279 0, 1280 /*80*/ 0, 1281 0, 1282 0, 1283 #if !defined(PETSC_USE_COMPLEX) 1284 MatGetInertia_SeqSBAIJ, 1285 #else 1286 0, 1287 #endif 1288 MatLoad_SeqSBAIJ, 1289 /*85*/ MatIsSymmetric_SeqSBAIJ, 1290 MatIsHermitian_SeqSBAIJ, 1291 MatIsStructurallySymmetric_SeqSBAIJ, 1292 0, 1293 0, 1294 /*90*/ 0, 1295 0, 1296 0, 1297 0, 1298 0, 1299 /*95*/ 0, 1300 0, 1301 0, 1302 0, 1303 0, 1304 /*100*/0, 1305 0, 1306 0, 1307 0, 1308 0, 1309 /*105*/0, 1310 MatRealPart_SeqSBAIJ, 1311 MatImaginaryPart_SeqSBAIJ, 1312 MatGetRowUpperTriangular_SeqSBAIJ, 1313 MatRestoreRowUpperTriangular_SeqSBAIJ 1314 }; 1315 1316 EXTERN_C_BEGIN 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1319 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1320 { 1321 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1322 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1323 PetscErrorCode ierr; 1324 1325 PetscFunctionBegin; 1326 if (aij->nonew != 1) { 1327 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1328 } 1329 1330 /* allocate space for values if not already there */ 1331 if (!aij->saved_values) { 1332 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1333 } 1334 1335 /* copy values over */ 1336 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 EXTERN_C_END 1340 1341 EXTERN_C_BEGIN 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1344 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1345 { 1346 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1347 PetscErrorCode ierr; 1348 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1349 1350 PetscFunctionBegin; 1351 if (aij->nonew != 1) { 1352 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1353 } 1354 if (!aij->saved_values) { 1355 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1356 } 1357 1358 /* copy values over */ 1359 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 EXTERN_C_END 1363 1364 EXTERN_C_BEGIN 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1367 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1368 { 1369 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1370 PetscErrorCode ierr; 1371 PetscInt i,mbs,bs2; 1372 PetscTruth skipallocation = PETSC_FALSE,flg; 1373 1374 PetscFunctionBegin; 1375 B->preallocated = PETSC_TRUE; 1376 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1377 B->rmap.bs = B->cmap.bs = bs; 1378 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1379 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1380 1381 mbs = B->rmap.N/bs; 1382 bs2 = bs*bs; 1383 1384 if (mbs*bs != B->rmap.N) { 1385 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1386 } 1387 1388 if (nz == MAT_SKIP_ALLOCATION) { 1389 skipallocation = PETSC_TRUE; 1390 nz = 0; 1391 } 1392 1393 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1394 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1395 if (nnz) { 1396 for (i=0; i<mbs; i++) { 1397 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1398 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1399 } 1400 } 1401 1402 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1403 if (!flg) { 1404 switch (bs) { 1405 case 1: 1406 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1407 B->ops->solve = MatSolve_SeqSBAIJ_1; 1408 B->ops->solves = MatSolves_SeqSBAIJ_1; 1409 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1410 B->ops->mult = MatMult_SeqSBAIJ_1; 1411 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1412 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1413 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1414 break; 1415 case 2: 1416 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1417 B->ops->solve = MatSolve_SeqSBAIJ_2; 1418 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1419 B->ops->mult = MatMult_SeqSBAIJ_2; 1420 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1421 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1422 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1423 break; 1424 case 3: 1425 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1426 B->ops->solve = MatSolve_SeqSBAIJ_3; 1427 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1428 B->ops->mult = MatMult_SeqSBAIJ_3; 1429 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1430 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1431 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1432 break; 1433 case 4: 1434 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1435 B->ops->solve = MatSolve_SeqSBAIJ_4; 1436 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1437 B->ops->mult = MatMult_SeqSBAIJ_4; 1438 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1439 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1440 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1441 break; 1442 case 5: 1443 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1444 B->ops->solve = MatSolve_SeqSBAIJ_5; 1445 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1446 B->ops->mult = MatMult_SeqSBAIJ_5; 1447 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1448 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1449 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1450 break; 1451 case 6: 1452 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1453 B->ops->solve = MatSolve_SeqSBAIJ_6; 1454 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1455 B->ops->mult = MatMult_SeqSBAIJ_6; 1456 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1457 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1458 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1459 break; 1460 case 7: 1461 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1462 B->ops->solve = MatSolve_SeqSBAIJ_7; 1463 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1464 B->ops->mult = MatMult_SeqSBAIJ_7; 1465 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1466 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1467 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1468 break; 1469 } 1470 } 1471 1472 b->mbs = mbs; 1473 b->nbs = mbs; 1474 if (!skipallocation) { 1475 /* b->ilen will count nonzeros in each block row so far. */ 1476 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1477 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1478 if (!nnz) { 1479 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1480 else if (nz <= 0) nz = 1; 1481 for (i=0; i<mbs; i++) { 1482 b->imax[i] = nz; 1483 } 1484 nz = nz*mbs; /* total nz */ 1485 } else { 1486 nz = 0; 1487 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1488 } 1489 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1490 1491 /* allocate the matrix space */ 1492 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1493 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1494 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1495 b->singlemalloc = PETSC_TRUE; 1496 1497 /* pointer to beginning of each row */ 1498 b->i[0] = 0; 1499 for (i=1; i<mbs+1; i++) { 1500 b->i[i] = b->i[i-1] + b->imax[i-1]; 1501 } 1502 b->free_a = PETSC_TRUE; 1503 b->free_ij = PETSC_TRUE; 1504 } else { 1505 b->free_a = PETSC_FALSE; 1506 b->free_ij = PETSC_FALSE; 1507 } 1508 1509 B->rmap.bs = bs; 1510 b->bs2 = bs2; 1511 b->nz = 0; 1512 b->maxnz = nz*bs2; 1513 1514 b->inew = 0; 1515 b->jnew = 0; 1516 b->anew = 0; 1517 b->a2anew = 0; 1518 b->permute = PETSC_FALSE; 1519 PetscFunctionReturn(0); 1520 } 1521 EXTERN_C_END 1522 1523 EXTERN_C_BEGIN 1524 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1525 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1526 EXTERN_C_END 1527 1528 /*MC 1529 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1530 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1531 1532 Options Database Keys: 1533 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1534 1535 Level: beginner 1536 1537 .seealso: MatCreateSeqSBAIJ 1538 M*/ 1539 1540 EXTERN_C_BEGIN 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1543 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1544 { 1545 Mat_SeqSBAIJ *b; 1546 PetscErrorCode ierr; 1547 PetscMPIInt size; 1548 PetscTruth flg; 1549 1550 PetscFunctionBegin; 1551 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1552 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1553 1554 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1555 B->data = (void*)b; 1556 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1557 B->ops->destroy = MatDestroy_SeqSBAIJ; 1558 B->ops->view = MatView_SeqSBAIJ; 1559 B->factor = 0; 1560 B->mapping = 0; 1561 b->row = 0; 1562 b->icol = 0; 1563 b->reallocs = 0; 1564 b->saved_values = 0; 1565 1566 1567 b->sorted = PETSC_FALSE; 1568 b->roworiented = PETSC_TRUE; 1569 b->nonew = 0; 1570 b->diag = 0; 1571 b->solve_work = 0; 1572 b->mult_work = 0; 1573 B->spptr = 0; 1574 b->keepzeroedrows = PETSC_FALSE; 1575 b->xtoy = 0; 1576 b->XtoY = 0; 1577 1578 b->inew = 0; 1579 b->jnew = 0; 1580 b->anew = 0; 1581 b->a2anew = 0; 1582 b->permute = PETSC_FALSE; 1583 1584 b->ignore_ltriangular = PETSC_FALSE; 1585 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1586 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1587 1588 b->getrow_utriangular = PETSC_FALSE; 1589 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1590 if (flg) b->getrow_utriangular = PETSC_TRUE; 1591 1592 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1593 "MatStoreValues_SeqSBAIJ", 1594 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1596 "MatRetrieveValues_SeqSBAIJ", 1597 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1599 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1600 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1602 "MatConvert_SeqSBAIJ_SeqAIJ", 1603 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1605 "MatConvert_SeqSBAIJ_SeqBAIJ", 1606 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1608 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1609 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1610 1611 B->symmetric = PETSC_TRUE; 1612 B->structurally_symmetric = PETSC_TRUE; 1613 B->symmetric_set = PETSC_TRUE; 1614 B->structurally_symmetric_set = PETSC_TRUE; 1615 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 EXTERN_C_END 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1622 /*@C 1623 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1624 compressed row) format. For good matrix assembly performance the 1625 user should preallocate the matrix storage by setting the parameter nz 1626 (or the array nnz). By setting these parameters accurately, performance 1627 during matrix assembly can be increased by more than a factor of 50. 1628 1629 Collective on Mat 1630 1631 Input Parameters: 1632 + A - the symmetric matrix 1633 . bs - size of block 1634 . nz - number of block nonzeros per block row (same for all rows) 1635 - nnz - array containing the number of block nonzeros in the upper triangular plus 1636 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1637 1638 Options Database Keys: 1639 . -mat_no_unroll - uses code that does not unroll the loops in the 1640 block calculations (much slower) 1641 . -mat_block_size - size of the blocks to use 1642 1643 Level: intermediate 1644 1645 Notes: 1646 Specify the preallocated storage with either nz or nnz (not both). 1647 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1648 allocation. For additional details, see the users manual chapter on 1649 matrices. 1650 1651 If the nnz parameter is given then the nz parameter is ignored 1652 1653 1654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1655 @*/ 1656 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1657 { 1658 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1659 1660 PetscFunctionBegin; 1661 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1662 if (f) { 1663 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1664 } 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatCreateSeqSBAIJ" 1670 /*@C 1671 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1672 compressed row) format. For good matrix assembly performance the 1673 user should preallocate the matrix storage by setting the parameter nz 1674 (or the array nnz). By setting these parameters accurately, performance 1675 during matrix assembly can be increased by more than a factor of 50. 1676 1677 Collective on MPI_Comm 1678 1679 Input Parameters: 1680 + comm - MPI communicator, set to PETSC_COMM_SELF 1681 . bs - size of block 1682 . m - number of rows, or number of columns 1683 . nz - number of block nonzeros per block row (same for all rows) 1684 - nnz - array containing the number of block nonzeros in the upper triangular plus 1685 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1686 1687 Output Parameter: 1688 . A - the symmetric matrix 1689 1690 Options Database Keys: 1691 . -mat_no_unroll - uses code that does not unroll the loops in the 1692 block calculations (much slower) 1693 . -mat_block_size - size of the blocks to use 1694 1695 Level: intermediate 1696 1697 Notes: 1698 1699 Specify the preallocated storage with either nz or nnz (not both). 1700 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1701 allocation. For additional details, see the users manual chapter on 1702 matrices. 1703 1704 If the nnz parameter is given then the nz parameter is ignored 1705 1706 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1707 @*/ 1708 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1709 { 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1714 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1715 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1716 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1722 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1723 { 1724 Mat C; 1725 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1726 PetscErrorCode ierr; 1727 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1728 1729 PetscFunctionBegin; 1730 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1731 1732 *B = 0; 1733 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1734 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1735 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1736 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1737 c = (Mat_SeqSBAIJ*)C->data; 1738 1739 C->preallocated = PETSC_TRUE; 1740 C->factor = A->factor; 1741 c->row = 0; 1742 c->icol = 0; 1743 c->saved_values = 0; 1744 c->keepzeroedrows = a->keepzeroedrows; 1745 C->assembled = PETSC_TRUE; 1746 1747 ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1748 ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1749 c->bs2 = a->bs2; 1750 c->mbs = a->mbs; 1751 c->nbs = a->nbs; 1752 1753 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1754 for (i=0; i<mbs; i++) { 1755 c->imax[i] = a->imax[i]; 1756 c->ilen[i] = a->ilen[i]; 1757 } 1758 1759 /* allocate the matrix space */ 1760 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1761 c->singlemalloc = PETSC_TRUE; 1762 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1763 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1764 if (mbs > 0) { 1765 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1766 if (cpvalues == MAT_COPY_VALUES) { 1767 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1768 } else { 1769 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1770 } 1771 } 1772 1773 c->sorted = a->sorted; 1774 c->roworiented = a->roworiented; 1775 c->nonew = a->nonew; 1776 1777 if (a->diag) { 1778 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1779 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1780 for (i=0; i<mbs; i++) { 1781 c->diag[i] = a->diag[i]; 1782 } 1783 } else c->diag = 0; 1784 c->nz = a->nz; 1785 c->maxnz = a->maxnz; 1786 c->solve_work = 0; 1787 c->mult_work = 0; 1788 c->free_a = PETSC_TRUE; 1789 c->free_ij = PETSC_TRUE; 1790 *B = C; 1791 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1797 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1798 { 1799 Mat_SeqSBAIJ *a; 1800 Mat B; 1801 PetscErrorCode ierr; 1802 int fd; 1803 PetscMPIInt size; 1804 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1805 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1806 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1807 PetscInt *masked,nmask,tmp,bs2,ishift; 1808 PetscScalar *aa; 1809 MPI_Comm comm = ((PetscObject)viewer)->comm; 1810 1811 PetscFunctionBegin; 1812 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1813 bs2 = bs*bs; 1814 1815 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1816 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1817 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1818 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1819 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1820 M = header[1]; N = header[2]; nz = header[3]; 1821 1822 if (header[3] < 0) { 1823 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1824 } 1825 1826 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1827 1828 /* 1829 This code adds extra rows to make sure the number of rows is 1830 divisible by the blocksize 1831 */ 1832 mbs = M/bs; 1833 extra_rows = bs - M + bs*(mbs); 1834 if (extra_rows == bs) extra_rows = 0; 1835 else mbs++; 1836 if (extra_rows) { 1837 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1838 } 1839 1840 /* read in row lengths */ 1841 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1842 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1843 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1844 1845 /* read in column indices */ 1846 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1847 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1848 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1849 1850 /* loop over row lengths determining block row lengths */ 1851 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1852 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1853 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1854 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1855 masked = mask + mbs; 1856 rowcount = 0; nzcount = 0; 1857 for (i=0; i<mbs; i++) { 1858 nmask = 0; 1859 for (j=0; j<bs; j++) { 1860 kmax = rowlengths[rowcount]; 1861 for (k=0; k<kmax; k++) { 1862 tmp = jj[nzcount++]/bs; /* block col. index */ 1863 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1864 } 1865 rowcount++; 1866 } 1867 s_browlengths[i] += nmask; 1868 1869 /* zero out the mask elements we set */ 1870 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1871 } 1872 1873 /* create our matrix */ 1874 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1875 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1876 ierr = MatSetType(B,type);CHKERRQ(ierr); 1877 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1878 a = (Mat_SeqSBAIJ*)B->data; 1879 1880 /* set matrix "i" values */ 1881 a->i[0] = 0; 1882 for (i=1; i<= mbs; i++) { 1883 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1884 a->ilen[i-1] = s_browlengths[i-1]; 1885 } 1886 a->nz = a->i[mbs]; 1887 1888 /* read in nonzero values */ 1889 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1890 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1891 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1892 1893 /* set "a" and "j" values into matrix */ 1894 nzcount = 0; jcount = 0; 1895 for (i=0; i<mbs; i++) { 1896 nzcountb = nzcount; 1897 nmask = 0; 1898 for (j=0; j<bs; j++) { 1899 kmax = rowlengths[i*bs+j]; 1900 for (k=0; k<kmax; k++) { 1901 tmp = jj[nzcount++]/bs; /* block col. index */ 1902 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1903 } 1904 } 1905 /* sort the masked values */ 1906 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1907 1908 /* set "j" values into matrix */ 1909 maskcount = 1; 1910 for (j=0; j<nmask; j++) { 1911 a->j[jcount++] = masked[j]; 1912 mask[masked[j]] = maskcount++; 1913 } 1914 1915 /* set "a" values into matrix */ 1916 ishift = bs2*a->i[i]; 1917 for (j=0; j<bs; j++) { 1918 kmax = rowlengths[i*bs+j]; 1919 for (k=0; k<kmax; k++) { 1920 tmp = jj[nzcountb]/bs ; /* block col. index */ 1921 if (tmp >= i){ 1922 block = mask[tmp] - 1; 1923 point = jj[nzcountb] - bs*tmp; 1924 idx = ishift + bs2*block + j + bs*point; 1925 a->a[idx] = aa[nzcountb]; 1926 } 1927 nzcountb++; 1928 } 1929 } 1930 /* zero out the mask elements we set */ 1931 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1932 } 1933 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1934 1935 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1936 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1937 ierr = PetscFree(aa);CHKERRQ(ierr); 1938 ierr = PetscFree(jj);CHKERRQ(ierr); 1939 ierr = PetscFree(mask);CHKERRQ(ierr); 1940 1941 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1942 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1943 ierr = MatView_Private(B);CHKERRQ(ierr); 1944 *A = B; 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1950 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1951 { 1952 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1953 MatScalar *aa=a->a,*v,*v1; 1954 PetscScalar *x,*b,*t,sum,d; 1955 PetscErrorCode ierr; 1956 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1957 PetscInt nz,nz1,*vj,*vj1,i; 1958 1959 PetscFunctionBegin; 1960 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1961 1962 its = its*lits; 1963 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1964 1965 if (bs > 1) 1966 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1967 1968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1969 if (xx != bb) { 1970 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1971 } else { 1972 b = x; 1973 } 1974 1975 if (!a->relax_work) { 1976 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 1977 } 1978 t = a->relax_work; 1979 1980 if (flag & SOR_ZERO_INITIAL_GUESS) { 1981 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1982 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 1983 1984 for (i=0; i<m; i++){ 1985 d = *(aa + ai[i]); /* diag[i] */ 1986 v = aa + ai[i] + 1; 1987 vj = aj + ai[i] + 1; 1988 nz = ai[i+1] - ai[i] - 1; 1989 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1990 x[i] = omega*t[i]/d; 1991 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1992 } 1993 } 1994 1995 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1996 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 1997 t = b; 1998 } 1999 2000 for (i=m-1; i>=0; i--){ 2001 d = *(aa + ai[i]); 2002 v = aa + ai[i] + 1; 2003 vj = aj + ai[i] + 1; 2004 nz = ai[i+1] - ai[i] - 1; 2005 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2006 sum = t[i]; 2007 while (nz--) sum -= x[*vj++]*(*v++); 2008 x[i] = (1-omega)*x[i] + omega*sum/d; 2009 } 2010 t = a->relax_work; 2011 } 2012 its--; 2013 } 2014 2015 while (its--) { 2016 /* 2017 forward sweep: 2018 for i=0,...,m-1: 2019 sum[i] = (b[i] - U(i,:)x )/d[i]; 2020 x[i] = (1-omega)x[i] + omega*sum[i]; 2021 b = b - x[i]*U^T(i,:); 2022 2023 */ 2024 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2025 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2026 2027 for (i=0; i<m; i++){ 2028 d = *(aa + ai[i]); /* diag[i] */ 2029 v = aa + ai[i] + 1; v1=v; 2030 vj = aj + ai[i] + 1; vj1=vj; 2031 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2032 sum = t[i]; 2033 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2034 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2035 x[i] = (1-omega)*x[i] + omega*sum/d; 2036 while (nz--) t[*vj++] -= x[i]*(*v++); 2037 } 2038 } 2039 2040 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2041 /* 2042 backward sweep: 2043 b = b - x[i]*U^T(i,:), i=0,...,n-2 2044 for i=m-1,...,0: 2045 sum[i] = (b[i] - U(i,:)x )/d[i]; 2046 x[i] = (1-omega)x[i] + omega*sum[i]; 2047 */ 2048 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2049 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2050 2051 for (i=0; i<m-1; i++){ /* update rhs */ 2052 v = aa + ai[i] + 1; 2053 vj = aj + ai[i] + 1; 2054 nz = ai[i+1] - ai[i] - 1; 2055 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2056 while (nz--) t[*vj++] -= x[i]*(*v++); 2057 } 2058 for (i=m-1; i>=0; i--){ 2059 d = *(aa + ai[i]); 2060 v = aa + ai[i] + 1; 2061 vj = aj + ai[i] + 1; 2062 nz = ai[i+1] - ai[i] - 1; 2063 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2064 sum = t[i]; 2065 while (nz--) sum -= x[*vj++]*(*v++); 2066 x[i] = (1-omega)*x[i] + omega*sum/d; 2067 } 2068 } 2069 } 2070 2071 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2072 if (bb != xx) { 2073 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2080 /*@ 2081 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2082 (upper triangular entries in CSR format) provided by the user. 2083 2084 Collective on MPI_Comm 2085 2086 Input Parameters: 2087 + comm - must be an MPI communicator of size 1 2088 . bs - size of block 2089 . m - number of rows 2090 . n - number of columns 2091 . i - row indices 2092 . j - column indices 2093 - a - matrix values 2094 2095 Output Parameter: 2096 . mat - the matrix 2097 2098 Level: intermediate 2099 2100 Notes: 2101 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2102 once the matrix is destroyed 2103 2104 You cannot set new nonzero locations into this matrix, that will generate an error. 2105 2106 The i and j indices are 0 based 2107 2108 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2109 2110 @*/ 2111 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2112 { 2113 PetscErrorCode ierr; 2114 PetscInt ii; 2115 Mat_SeqSBAIJ *sbaij; 2116 2117 PetscFunctionBegin; 2118 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2119 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2120 2121 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2122 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2123 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2124 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2125 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2126 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2127 2128 sbaij->i = i; 2129 sbaij->j = j; 2130 sbaij->a = a; 2131 sbaij->singlemalloc = PETSC_FALSE; 2132 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2133 sbaij->free_a = PETSC_FALSE; 2134 sbaij->free_ij = PETSC_FALSE; 2135 2136 for (ii=0; ii<m; ii++) { 2137 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2138 #if defined(PETSC_USE_DEBUG) 2139 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2140 #endif 2141 } 2142 #if defined(PETSC_USE_DEBUG) 2143 for (ii=0; ii<sbaij->i[m]; ii++) { 2144 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2145 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2146 } 2147 #endif 2148 2149 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2150 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 PetscFunctionReturn(0); 2152 } 2153 2154 2155 2156 2157 2158