1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 int *diag,*jj = a->j,i,ierr; 21 22 PetscFunctionBegin; 23 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 24 diag = a->diag; 25 for (i=0; i<a->mbs; i++) { 26 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 27 } 28 PetscFunctionReturn(0); 29 } 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 33 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 34 { 35 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 36 int i,mbs = a->mbs,ierr; 37 38 PetscFunctionBegin; 39 if (a->diag) PetscFunctionReturn(0); 40 41 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 42 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 43 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 49 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 50 { 51 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 52 int n = a->mbs,i; 53 54 PetscFunctionBegin; 55 *nn = n; 56 if (!ia) PetscFunctionReturn(0); 57 58 if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n]; 61 for (i=0; i<nz; i++) a->j[i]++; 62 for (i=0; i<n+1; i++) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 PetscFunctionReturn(0); 68 } 69 70 #undef __FUNCT__ 71 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 72 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 73 { 74 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 75 int i,n = a->mbs; 76 77 PetscFunctionBegin; 78 if (!ia) PetscFunctionReturn(0); 79 80 if (oshift == 1) { 81 int nz = a->i[n]-1; 82 for (i=0; i<nz; i++) a->j[i]--; 83 for (i=0; i<n+1; i++) a->i[i]--; 84 } 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 90 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 91 { 92 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 93 94 PetscFunctionBegin; 95 *bs = sbaij->bs; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 101 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 102 { 103 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 #if defined(PETSC_USE_LOG) 108 PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz); 109 #endif 110 ierr = PetscFree(a->a);CHKERRQ(ierr); 111 if (!a->singlemalloc) { 112 ierr = PetscFree(a->i);CHKERRQ(ierr); 113 ierr = PetscFree(a->j);CHKERRQ(ierr); 114 } 115 if (a->row) { 116 ierr = ISDestroy(a->row);CHKERRQ(ierr); 117 } 118 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 119 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 120 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 121 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 122 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 123 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 124 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 125 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 126 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 127 128 if (a->inew){ 129 ierr = PetscFree(a->inew);CHKERRQ(ierr); 130 a->inew = 0; 131 } 132 ierr = PetscFree(a);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 138 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 139 { 140 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 141 142 PetscFunctionBegin; 143 switch (op) { 144 case MAT_ROW_ORIENTED: 145 a->roworiented = PETSC_TRUE; 146 break; 147 case MAT_COLUMN_ORIENTED: 148 a->roworiented = PETSC_FALSE; 149 break; 150 case MAT_COLUMNS_SORTED: 151 a->sorted = PETSC_TRUE; 152 break; 153 case MAT_COLUMNS_UNSORTED: 154 a->sorted = PETSC_FALSE; 155 break; 156 case MAT_KEEP_ZEROED_ROWS: 157 a->keepzeroedrows = PETSC_TRUE; 158 break; 159 case MAT_NO_NEW_NONZERO_LOCATIONS: 160 a->nonew = 1; 161 break; 162 case MAT_NEW_NONZERO_LOCATION_ERR: 163 a->nonew = -1; 164 break; 165 case MAT_NEW_NONZERO_ALLOCATION_ERR: 166 a->nonew = -2; 167 break; 168 case MAT_YES_NEW_NONZERO_LOCATIONS: 169 a->nonew = 0; 170 break; 171 case MAT_ROWS_SORTED: 172 case MAT_ROWS_UNSORTED: 173 case MAT_YES_NEW_DIAGONALS: 174 case MAT_IGNORE_OFF_PROC_ENTRIES: 175 case MAT_USE_HASH_TABLE: 176 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 177 break; 178 case MAT_NO_NEW_DIAGONALS: 179 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 180 case MAT_NOT_SYMMETRIC: 181 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 182 case MAT_HERMITIAN: 183 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 184 case MAT_SYMMETRIC: 185 case MAT_STRUCTURALLY_SYMMETRIC: 186 case MAT_NOT_HERMITIAN: 187 case MAT_SYMMETRY_ETERNAL: 188 case MAT_NOT_SYMMETRY_ETERNAL: 189 break; 190 default: 191 SETERRQ(PETSC_ERR_SUP,"unknown option"); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 198 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 199 { 200 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 201 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 202 MatScalar *aa,*aa_i; 203 PetscScalar *v_i; 204 205 PetscFunctionBegin; 206 bs = a->bs; 207 ai = a->i; 208 aj = a->j; 209 aa = a->a; 210 bs2 = a->bs2; 211 212 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 213 214 bn = row/bs; /* Block number */ 215 bp = row % bs; /* Block position */ 216 M = ai[bn+1] - ai[bn]; 217 *ncols = bs*M; 218 219 if (v) { 220 *v = 0; 221 if (*ncols) { 222 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 223 for (i=0; i<M; i++) { /* for each block in the block row */ 224 v_i = *v + i*bs; 225 aa_i = aa + bs2*(ai[bn] + i); 226 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 227 } 228 } 229 } 230 231 if (cols) { 232 *cols = 0; 233 if (*ncols) { 234 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 235 for (i=0; i<M; i++) { /* for each block in the block row */ 236 cols_i = *cols + i*bs; 237 itmp = bs*aj[ai[bn] + i]; 238 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 239 } 240 } 241 } 242 243 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 244 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 245 #ifdef column_search 246 v_i = *v + M*bs; 247 cols_i = *cols + M*bs; 248 for (i=0; i<bn; i++){ /* for each block row */ 249 M = ai[i+1] - ai[i]; 250 for (j=0; j<M; j++){ 251 itmp = aj[ai[i] + j]; /* block column value */ 252 if (itmp == bn){ 253 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 254 for (k=0; k<bs; k++) { 255 *cols_i++ = i*bs+k; 256 *v_i++ = aa_i[k]; 257 } 258 *ncols += bs; 259 break; 260 } 261 } 262 } 263 #endif 264 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 270 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 271 { 272 PetscErrorCode ierr; 273 274 PetscFunctionBegin; 275 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 276 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 282 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 283 { 284 PetscErrorCode ierr; 285 PetscFunctionBegin; 286 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 292 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 293 { 294 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 295 PetscErrorCode ierr; 296 int i,j,bs = a->bs,k,l,bs2=a->bs2; 297 char *name; 298 PetscViewerFormat format; 299 300 PetscFunctionBegin; 301 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 302 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 303 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 304 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 305 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 306 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 307 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 308 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 309 for (i=0; i<a->mbs; i++) { 310 for (j=0; j<bs; j++) { 311 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 312 for (k=a->i[i]; k<a->i[i+1]; k++) { 313 for (l=0; l<bs; l++) { 314 #if defined(PETSC_USE_COMPLEX) 315 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 316 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 317 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 318 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 320 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 322 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 323 } 324 #else 325 if (a->a[bs2*k + l*bs + j] != 0.0) { 326 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 327 } 328 #endif 329 } 330 } 331 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 332 } 333 } 334 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 335 } else { 336 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 337 for (i=0; i<a->mbs; i++) { 338 for (j=0; j<bs; j++) { 339 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 340 for (k=a->i[i]; k<a->i[i+1]; k++) { 341 for (l=0; l<bs; l++) { 342 #if defined(PETSC_USE_COMPLEX) 343 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 347 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 348 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } else { 350 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 351 } 352 #else 353 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 354 #endif 355 } 356 } 357 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 358 } 359 } 360 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 361 } 362 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 368 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 369 { 370 Mat A = (Mat) Aa; 371 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 372 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 373 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 374 MatScalar *aa; 375 MPI_Comm comm; 376 PetscViewer viewer; 377 378 PetscFunctionBegin; 379 /* 380 This is nasty. If this is called from an originally parallel matrix 381 then all processes call this,but only the first has the matrix so the 382 rest should return immediately. 383 */ 384 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 385 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 386 if (rank) PetscFunctionReturn(0); 387 388 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 389 390 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 391 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 392 393 /* loop over matrix elements drawing boxes */ 394 color = PETSC_DRAW_BLUE; 395 for (i=0,row=0; i<mbs; i++,row+=bs) { 396 for (j=a->i[i]; j<a->i[i+1]; j++) { 397 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 398 x_l = a->j[j]*bs; x_r = x_l + 1.0; 399 aa = a->a + j*bs2; 400 for (k=0; k<bs; k++) { 401 for (l=0; l<bs; l++) { 402 if (PetscRealPart(*aa++) >= 0.) continue; 403 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 404 } 405 } 406 } 407 } 408 color = PETSC_DRAW_CYAN; 409 for (i=0,row=0; i<mbs; i++,row+=bs) { 410 for (j=a->i[i]; j<a->i[i+1]; j++) { 411 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 412 x_l = a->j[j]*bs; x_r = x_l + 1.0; 413 aa = a->a + j*bs2; 414 for (k=0; k<bs; k++) { 415 for (l=0; l<bs; l++) { 416 if (PetscRealPart(*aa++) != 0.) continue; 417 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 418 } 419 } 420 } 421 } 422 423 color = PETSC_DRAW_RED; 424 for (i=0,row=0; i<mbs; i++,row+=bs) { 425 for (j=a->i[i]; j<a->i[i+1]; j++) { 426 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 427 x_l = a->j[j]*bs; x_r = x_l + 1.0; 428 aa = a->a + j*bs2; 429 for (k=0; k<bs; k++) { 430 for (l=0; l<bs; l++) { 431 if (PetscRealPart(*aa++) <= 0.) continue; 432 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 433 } 434 } 435 } 436 } 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 442 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 443 { 444 PetscErrorCode ierr; 445 PetscReal xl,yl,xr,yr,w,h; 446 PetscDraw draw; 447 PetscTruth isnull; 448 449 PetscFunctionBegin; 450 451 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 452 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 453 454 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 455 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 456 xr += w; yr += h; xl = -w; yl = -h; 457 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 458 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 459 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatView_SeqSBAIJ" 465 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 466 { 467 PetscErrorCode ierr; 468 PetscTruth iascii,isdraw; 469 470 PetscFunctionBegin; 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 472 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 473 if (iascii){ 474 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 475 } else if (isdraw) { 476 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 477 } else { 478 Mat B; 479 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 480 ierr = MatView(B,viewer);CHKERRQ(ierr); 481 ierr = MatDestroy(B);CHKERRQ(ierr); 482 } 483 PetscFunctionReturn(0); 484 } 485 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 489 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 490 { 491 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 492 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 493 int *ai = a->i,*ailen = a->ilen; 494 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 495 MatScalar *ap,*aa = a->a,zero = 0.0; 496 497 PetscFunctionBegin; 498 for (k=0; k<m; k++) { /* loop over rows */ 499 row = im[k]; brow = row/bs; 500 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 501 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 502 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 503 nrow = ailen[brow]; 504 for (l=0; l<n; l++) { /* loop over columns */ 505 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 506 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 507 col = in[l] ; 508 bcol = col/bs; 509 cidx = col%bs; 510 ridx = row%bs; 511 high = nrow; 512 low = 0; /* assume unsorted */ 513 while (high-low > 5) { 514 t = (low+high)/2; 515 if (rp[t] > bcol) high = t; 516 else low = t; 517 } 518 for (i=low; i<high; i++) { 519 if (rp[i] > bcol) break; 520 if (rp[i] == bcol) { 521 *v++ = ap[bs2*i+bs*cidx+ridx]; 522 goto finished; 523 } 524 } 525 *v++ = zero; 526 finished:; 527 } 528 } 529 PetscFunctionReturn(0); 530 } 531 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 535 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 536 { 537 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 538 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 539 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 540 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 541 PetscTruth roworiented=a->roworiented; 542 const MatScalar *value = v; 543 MatScalar *ap,*aa = a->a,*bap; 544 545 PetscFunctionBegin; 546 if (roworiented) { 547 stepval = (n-1)*bs; 548 } else { 549 stepval = (m-1)*bs; 550 } 551 for (k=0; k<m; k++) { /* loop over added rows */ 552 row = im[k]; 553 if (row < 0) continue; 554 #if defined(PETSC_USE_BOPT_g) 555 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 556 #endif 557 rp = aj + ai[row]; 558 ap = aa + bs2*ai[row]; 559 rmax = imax[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 if (in[l] < 0) continue; 564 col = in[l]; 565 #if defined(PETSC_USE_BOPT_g) 566 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 567 #endif 568 if (col < row) continue; /* ignore lower triangular block */ 569 if (roworiented) { 570 value = v + k*(stepval+bs)*bs + l*bs; 571 } else { 572 value = v + l*(stepval+bs)*bs + k*bs; 573 } 574 if (!sorted) low = 0; high = nrow; 575 while (high-low > 7) { 576 t = (low+high)/2; 577 if (rp[t] > col) high = t; 578 else low = t; 579 } 580 for (i=low; i<high; i++) { 581 if (rp[i] > col) break; 582 if (rp[i] == col) { 583 bap = ap + bs2*i; 584 if (roworiented) { 585 if (is == ADD_VALUES) { 586 for (ii=0; ii<bs; ii++,value+=stepval) { 587 for (jj=ii; jj<bs2; jj+=bs) { 588 bap[jj] += *value++; 589 } 590 } 591 } else { 592 for (ii=0; ii<bs; ii++,value+=stepval) { 593 for (jj=ii; jj<bs2; jj+=bs) { 594 bap[jj] = *value++; 595 } 596 } 597 } 598 } else { 599 if (is == ADD_VALUES) { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=0; jj<bs; jj++) { 602 *bap++ += *value++; 603 } 604 } 605 } else { 606 for (ii=0; ii<bs; ii++,value+=stepval) { 607 for (jj=0; jj<bs; jj++) { 608 *bap++ = *value++; 609 } 610 } 611 } 612 } 613 goto noinsert2; 614 } 615 } 616 if (nonew == 1) goto noinsert2; 617 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 618 if (nrow >= rmax) { 619 /* there is no extra room in row, therefore enlarge */ 620 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 621 MatScalar *new_a; 622 623 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 624 625 /* malloc new storage space */ 626 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 627 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 628 new_j = (int*)(new_a + bs2*new_nz); 629 new_i = new_j + new_nz; 630 631 /* copy over old data into new slots */ 632 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 633 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 634 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 635 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 636 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 637 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 638 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 640 /* free up old matrix storage */ 641 ierr = PetscFree(a->a);CHKERRQ(ierr); 642 if (!a->singlemalloc) { 643 ierr = PetscFree(a->i);CHKERRQ(ierr); 644 ierr = PetscFree(a->j);CHKERRQ(ierr); 645 } 646 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 647 a->singlemalloc = PETSC_TRUE; 648 649 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 650 rmax = imax[row] = imax[row] + CHUNKSIZE; 651 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 652 a->maxnz += bs2*CHUNKSIZE; 653 a->reallocs++; 654 a->nz++; 655 } 656 N = nrow++ - 1; 657 /* shift up all the later entries in this row */ 658 for (ii=N; ii>=i; ii--) { 659 rp[ii+1] = rp[ii]; 660 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 661 } 662 if (N >= i) { 663 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 664 } 665 rp[i] = col; 666 bap = ap + bs2*i; 667 if (roworiented) { 668 for (ii=0; ii<bs; ii++,value+=stepval) { 669 for (jj=ii; jj<bs2; jj+=bs) { 670 bap[jj] = *value++; 671 } 672 } 673 } else { 674 for (ii=0; ii<bs; ii++,value+=stepval) { 675 for (jj=0; jj<bs; jj++) { 676 *bap++ = *value++; 677 } 678 } 679 } 680 noinsert2:; 681 low = i; 682 } 683 ailen[row] = nrow; 684 } 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 690 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 691 { 692 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 693 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 694 int m = A->m,*ip,N,*ailen = a->ilen; 695 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 696 MatScalar *aa = a->a,*ap; 697 698 PetscFunctionBegin; 699 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 700 701 if (m) rmax = ailen[0]; 702 for (i=1; i<mbs; i++) { 703 /* move each row back by the amount of empty slots (fshift) before it*/ 704 fshift += imax[i-1] - ailen[i-1]; 705 rmax = PetscMax(rmax,ailen[i]); 706 if (fshift) { 707 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 708 N = ailen[i]; 709 for (j=0; j<N; j++) { 710 ip[j-fshift] = ip[j]; 711 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 712 } 713 } 714 ai[i] = ai[i-1] + ailen[i-1]; 715 } 716 if (mbs) { 717 fshift += imax[mbs-1] - ailen[mbs-1]; 718 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 719 } 720 /* reset ilen and imax for each row */ 721 for (i=0; i<mbs; i++) { 722 ailen[i] = imax[i] = ai[i+1] - ai[i]; 723 } 724 a->nz = ai[mbs]; 725 726 /* diagonals may have moved, reset it */ 727 if (a->diag) { 728 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 729 } 730 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 731 m,A->m,a->bs,fshift*bs2,a->nz*bs2); 732 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 733 a->reallocs); 734 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 735 a->reallocs = 0; 736 A->info.nz_unneeded = (PetscReal)fshift*bs2; 737 738 PetscFunctionReturn(0); 739 } 740 741 /* 742 This function returns an array of flags which indicate the locations of contiguous 743 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 744 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 745 Assume: sizes should be long enough to hold all the values. 746 */ 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 749 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 750 { 751 int i,j,k,row; 752 PetscTruth flg; 753 754 PetscFunctionBegin; 755 for (i=0,j=0; i<n; j++) { 756 row = idx[i]; 757 if (row%bs!=0) { /* Not the begining of a block */ 758 sizes[j] = 1; 759 i++; 760 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 761 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 762 i++; 763 } else { /* Begining of the block, so check if the complete block exists */ 764 flg = PETSC_TRUE; 765 for (k=1; k<bs; k++) { 766 if (row+k != idx[i+k]) { /* break in the block */ 767 flg = PETSC_FALSE; 768 break; 769 } 770 } 771 if (flg == PETSC_TRUE) { /* No break in the bs */ 772 sizes[j] = bs; 773 i+= bs; 774 } else { 775 sizes[j] = 1; 776 i++; 777 } 778 } 779 } 780 *bs_max = j; 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 786 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 787 { 788 PetscFunctionBegin; 789 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 790 } 791 792 /* Only add/insert a(i,j) with i<=j (blocks). 793 Any a(i,j) with i>j input by user is ingored. 794 */ 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 798 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 799 { 800 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 801 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 802 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 803 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 804 int ridx,cidx,bs2=a->bs2,ierr; 805 MatScalar *ap,value,*aa=a->a,*bap; 806 807 PetscFunctionBegin; 808 809 for (k=0; k<m; k++) { /* loop over added rows */ 810 row = im[k]; /* row number */ 811 brow = row/bs; /* block row number */ 812 if (row < 0) continue; 813 #if defined(PETSC_USE_BOPT_g) 814 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 815 #endif 816 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 817 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 818 rmax = imax[brow]; /* maximum space allocated for this row */ 819 nrow = ailen[brow]; /* actual length of this row */ 820 low = 0; 821 822 for (l=0; l<n; l++) { /* loop over added columns */ 823 if (in[l] < 0) continue; 824 #if defined(PETSC_USE_BOPT_g) 825 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 826 #endif 827 col = in[l]; 828 bcol = col/bs; /* block col number */ 829 830 if (brow <= bcol){ 831 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 832 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 833 /* element value a(k,l) */ 834 if (roworiented) { 835 value = v[l + k*n]; 836 } else { 837 value = v[k + l*m]; 838 } 839 840 /* move pointer bap to a(k,l) quickly and add/insert value */ 841 if (!sorted) low = 0; high = nrow; 842 while (high-low > 7) { 843 t = (low+high)/2; 844 if (rp[t] > bcol) high = t; 845 else low = t; 846 } 847 for (i=low; i<high; i++) { 848 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 849 if (rp[i] > bcol) break; 850 if (rp[i] == bcol) { 851 bap = ap + bs2*i + bs*cidx + ridx; 852 if (is == ADD_VALUES) *bap += value; 853 else *bap = value; 854 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 855 if (brow == bcol && ridx < cidx){ 856 bap = ap + bs2*i + bs*ridx + cidx; 857 if (is == ADD_VALUES) *bap += value; 858 else *bap = value; 859 } 860 goto noinsert1; 861 } 862 } 863 864 if (nonew == 1) goto noinsert1; 865 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 866 if (nrow >= rmax) { 867 /* there is no extra room in row, therefore enlarge */ 868 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 869 MatScalar *new_a; 870 871 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 872 873 /* Malloc new storage space */ 874 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 875 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 876 new_j = (int*)(new_a + bs2*new_nz); 877 new_i = new_j + new_nz; 878 879 /* copy over old data into new slots */ 880 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 881 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 882 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 883 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 884 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 885 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 886 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 887 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 888 /* free up old matrix storage */ 889 ierr = PetscFree(a->a);CHKERRQ(ierr); 890 if (!a->singlemalloc) { 891 ierr = PetscFree(a->i);CHKERRQ(ierr); 892 ierr = PetscFree(a->j);CHKERRQ(ierr); 893 } 894 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 895 a->singlemalloc = PETSC_TRUE; 896 897 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 898 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 899 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 900 a->maxnz += bs2*CHUNKSIZE; 901 a->reallocs++; 902 a->nz++; 903 } 904 905 N = nrow++ - 1; 906 /* shift up all the later entries in this row */ 907 for (ii=N; ii>=i; ii--) { 908 rp[ii+1] = rp[ii]; 909 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 910 } 911 if (N>=i) { 912 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 913 } 914 rp[i] = bcol; 915 ap[bs2*i + bs*cidx + ridx] = value; 916 noinsert1:; 917 low = i; 918 /* } */ 919 } 920 } /* end of if .. if.. */ 921 } /* end of loop over added columns */ 922 ailen[brow] = nrow; 923 } /* end of loop over added rows */ 924 925 PetscFunctionReturn(0); 926 } 927 928 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 929 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 930 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 931 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 932 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 933 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 934 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 935 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat); 936 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 937 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 938 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 939 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 940 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 941 extern int MatZeroEntries_SeqSBAIJ(Mat); 942 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 943 944 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 945 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 946 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 947 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 948 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 949 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 950 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 951 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 952 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 953 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 954 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 955 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 956 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 957 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 958 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 959 960 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 961 962 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 963 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 964 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 965 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 966 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 967 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 968 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 969 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 970 971 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 972 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 973 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 974 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 975 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 976 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 977 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 978 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 979 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 980 981 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 982 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 983 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 984 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 985 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 986 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 987 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 988 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 989 990 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 991 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 992 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 993 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 994 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 995 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 996 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 997 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 998 999 #ifdef HAVE_MatICCFactor 1000 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1003 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1004 { 1005 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1006 Mat outA; 1007 PetscErrorCode ierr; 1008 PetscTruth row_identity,col_identity; 1009 1010 PetscFunctionBegin; 1011 /* 1012 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1013 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1014 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1015 if (!row_identity || !col_identity) { 1016 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1017 } 1018 */ 1019 1020 outA = inA; 1021 inA->factor = FACTOR_CHOLESKY; 1022 1023 if (!a->diag) { 1024 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1025 } 1026 /* 1027 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1028 for ILU(0) factorization with natural ordering 1029 */ 1030 switch (a->bs) { 1031 case 1: 1032 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1033 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1034 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1035 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1036 case 2: 1037 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1038 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1039 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1040 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1041 break; 1042 case 3: 1043 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1044 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1045 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1046 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1047 break; 1048 case 4: 1049 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1050 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1051 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1052 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1053 break; 1054 case 5: 1055 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1056 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1057 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1058 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1059 break; 1060 case 6: 1061 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1062 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1063 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1064 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1065 break; 1066 case 7: 1067 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1068 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1069 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1070 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1071 break; 1072 default: 1073 a->row = row; 1074 a->icol = col; 1075 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1077 1078 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1079 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1080 PetscLogObjectParent(inA,a->icol); 1081 1082 if (!a->solve_work) { 1083 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1084 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1085 } 1086 } 1087 1088 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1089 1090 PetscFunctionReturn(0); 1091 } 1092 #endif 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1096 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1097 { 1098 static PetscTruth called = PETSC_FALSE; 1099 MPI_Comm comm = A->comm; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1104 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1105 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 EXTERN_C_BEGIN 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1112 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1113 { 1114 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1115 int i,nz,n; 1116 1117 PetscFunctionBegin; 1118 nz = baij->maxnz; 1119 n = mat->n; 1120 for (i=0; i<nz; i++) { 1121 baij->j[i] = indices[i]; 1122 } 1123 baij->nz = nz; 1124 for (i=0; i<n; i++) { 1125 baij->ilen[i] = baij->imax[i]; 1126 } 1127 1128 PetscFunctionReturn(0); 1129 } 1130 EXTERN_C_END 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1134 /*@ 1135 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1136 in the matrix. 1137 1138 Input Parameters: 1139 + mat - the SeqSBAIJ matrix 1140 - indices - the column indices 1141 1142 Level: advanced 1143 1144 Notes: 1145 This can be called if you have precomputed the nonzero structure of the 1146 matrix and want to provide it to the matrix object to improve the performance 1147 of the MatSetValues() operation. 1148 1149 You MUST have set the correct numbers of nonzeros per row in the call to 1150 MatCreateSeqSBAIJ(). 1151 1152 MUST be called before any calls to MatSetValues() 1153 1154 .seealso: MatCreateSeqSBAIJ 1155 @*/ 1156 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1157 { 1158 PetscErrorCode ierr,(*f)(Mat,int *); 1159 1160 PetscFunctionBegin; 1161 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1162 PetscValidPointer(indices,2); 1163 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1164 if (f) { 1165 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1166 } else { 1167 SETERRQ(1,"Wrong type of matrix to set column indices"); 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1174 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1175 { 1176 PetscErrorCode ierr; 1177 1178 PetscFunctionBegin; 1179 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1185 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1186 { 1187 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1188 PetscFunctionBegin; 1189 *array = a->a; 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1195 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1196 { 1197 PetscFunctionBegin; 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #include "petscblaslapack.h" 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1204 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1205 { 1206 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1207 PetscErrorCode ierr; 1208 int one=1,i,bs=y->bs,bs2,j; 1209 1210 PetscFunctionBegin; 1211 if (str == SAME_NONZERO_PATTERN) { 1212 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1213 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1214 if (y->xtoy && y->XtoY != X) { 1215 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1216 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1217 } 1218 if (!y->xtoy) { /* get xtoy */ 1219 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1220 y->XtoY = X; 1221 } 1222 bs2 = bs*bs; 1223 for (i=0; i<x->nz; i++) { 1224 j = 0; 1225 while (j < bs2){ 1226 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1227 j++; 1228 } 1229 } 1230 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1231 } else { 1232 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1233 } 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1239 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1240 { 1241 PetscFunctionBegin; 1242 *flg = PETSC_TRUE; 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1248 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1249 { 1250 PetscFunctionBegin; 1251 *flg = PETSC_TRUE; 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1257 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1258 { 1259 PetscFunctionBegin; 1260 *flg = PETSC_FALSE; 1261 PetscFunctionReturn(0); 1262 } 1263 1264 /* -------------------------------------------------------------------*/ 1265 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1266 MatGetRow_SeqSBAIJ, 1267 MatRestoreRow_SeqSBAIJ, 1268 MatMult_SeqSBAIJ_N, 1269 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1270 MatMultTranspose_SeqSBAIJ, 1271 MatMultTransposeAdd_SeqSBAIJ, 1272 MatSolve_SeqSBAIJ_N, 1273 0, 1274 0, 1275 /*10*/ 0, 1276 0, 1277 MatCholeskyFactor_SeqSBAIJ, 1278 MatRelax_SeqSBAIJ, 1279 MatTranspose_SeqSBAIJ, 1280 /*15*/ MatGetInfo_SeqSBAIJ, 1281 MatEqual_SeqSBAIJ, 1282 MatGetDiagonal_SeqSBAIJ, 1283 MatDiagonalScale_SeqSBAIJ, 1284 MatNorm_SeqSBAIJ, 1285 /*20*/ 0, 1286 MatAssemblyEnd_SeqSBAIJ, 1287 0, 1288 MatSetOption_SeqSBAIJ, 1289 MatZeroEntries_SeqSBAIJ, 1290 /*25*/ MatZeroRows_SeqSBAIJ, 1291 0, 1292 0, 1293 MatCholeskyFactorSymbolic_SeqSBAIJ, 1294 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1295 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1296 0, 1297 MatICCFactorSymbolic_SeqSBAIJ, 1298 MatGetArray_SeqSBAIJ, 1299 MatRestoreArray_SeqSBAIJ, 1300 /*35*/ MatDuplicate_SeqSBAIJ, 1301 0, 1302 0, 1303 0, 1304 0, 1305 /*40*/ MatAXPY_SeqSBAIJ, 1306 MatGetSubMatrices_SeqSBAIJ, 1307 MatIncreaseOverlap_SeqSBAIJ, 1308 MatGetValues_SeqSBAIJ, 1309 0, 1310 /*45*/ MatPrintHelp_SeqSBAIJ, 1311 MatScale_SeqSBAIJ, 1312 0, 1313 0, 1314 0, 1315 /*50*/ MatGetBlockSize_SeqSBAIJ, 1316 MatGetRowIJ_SeqSBAIJ, 1317 MatRestoreRowIJ_SeqSBAIJ, 1318 0, 1319 0, 1320 /*55*/ 0, 1321 0, 1322 0, 1323 0, 1324 MatSetValuesBlocked_SeqSBAIJ, 1325 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1326 0, 1327 0, 1328 MatGetPetscMaps_Petsc, 1329 0, 1330 /*65*/ 0, 1331 0, 1332 0, 1333 0, 1334 0, 1335 /*70*/ MatGetRowMax_SeqSBAIJ, 1336 0, 1337 0, 1338 0, 1339 0, 1340 /*75*/ 0, 1341 0, 1342 0, 1343 0, 1344 0, 1345 /*80*/ 0, 1346 0, 1347 0, 1348 #if !defined(PETSC_USE_COMPLEX) 1349 MatGetInertia_SeqSBAIJ, 1350 #else 1351 0, 1352 #endif 1353 /*85*/ MatLoad_SeqSBAIJ, 1354 MatIsSymmetric_SeqSBAIJ, 1355 MatIsStructurallySymmetric_SeqSBAIJ, 1356 MatIsHermitian_SeqSBAIJ 1357 }; 1358 1359 EXTERN_C_BEGIN 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1362 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1363 { 1364 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1365 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1366 PetscErrorCode ierr; 1367 1368 PetscFunctionBegin; 1369 if (aij->nonew != 1) { 1370 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1371 } 1372 1373 /* allocate space for values if not already there */ 1374 if (!aij->saved_values) { 1375 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1376 } 1377 1378 /* copy values over */ 1379 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 EXTERN_C_END 1383 1384 EXTERN_C_BEGIN 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1387 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1388 { 1389 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1390 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1391 1392 PetscFunctionBegin; 1393 if (aij->nonew != 1) { 1394 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1395 } 1396 if (!aij->saved_values) { 1397 SETERRQ(1,"Must call MatStoreValues(A);first"); 1398 } 1399 1400 /* copy values over */ 1401 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 EXTERN_C_END 1405 1406 EXTERN_C_BEGIN 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1409 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1410 { 1411 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1412 int i,len,ierr,mbs,bs2; 1413 PetscTruth flg; 1414 1415 PetscFunctionBegin; 1416 B->preallocated = PETSC_TRUE; 1417 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1418 mbs = B->m/bs; 1419 bs2 = bs*bs; 1420 1421 if (mbs*bs != B->m) { 1422 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1423 } 1424 1425 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1426 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1427 if (nnz) { 1428 for (i=0; i<mbs; i++) { 1429 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1430 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1431 } 1432 } 1433 1434 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1435 if (!flg) { 1436 switch (bs) { 1437 case 1: 1438 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1439 B->ops->solve = MatSolve_SeqSBAIJ_1; 1440 B->ops->solves = MatSolves_SeqSBAIJ_1; 1441 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1442 B->ops->mult = MatMult_SeqSBAIJ_1; 1443 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1444 break; 1445 case 2: 1446 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1447 B->ops->solve = MatSolve_SeqSBAIJ_2; 1448 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1449 B->ops->mult = MatMult_SeqSBAIJ_2; 1450 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1451 break; 1452 case 3: 1453 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1454 B->ops->solve = MatSolve_SeqSBAIJ_3; 1455 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1456 B->ops->mult = MatMult_SeqSBAIJ_3; 1457 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1458 break; 1459 case 4: 1460 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1461 B->ops->solve = MatSolve_SeqSBAIJ_4; 1462 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1463 B->ops->mult = MatMult_SeqSBAIJ_4; 1464 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1465 break; 1466 case 5: 1467 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1468 B->ops->solve = MatSolve_SeqSBAIJ_5; 1469 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1470 B->ops->mult = MatMult_SeqSBAIJ_5; 1471 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1472 break; 1473 case 6: 1474 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1475 B->ops->solve = MatSolve_SeqSBAIJ_6; 1476 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1477 B->ops->mult = MatMult_SeqSBAIJ_6; 1478 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1479 break; 1480 case 7: 1481 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1482 B->ops->solve = MatSolve_SeqSBAIJ_7; 1483 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1484 B->ops->mult = MatMult_SeqSBAIJ_7; 1485 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1486 break; 1487 } 1488 } 1489 1490 b->mbs = mbs; 1491 b->nbs = mbs; 1492 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1493 if (!nnz) { 1494 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1495 else if (nz <= 0) nz = 1; 1496 for (i=0; i<mbs; i++) { 1497 b->imax[i] = nz; 1498 } 1499 nz = nz*mbs; /* total nz */ 1500 } else { 1501 nz = 0; 1502 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1503 } 1504 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1505 1506 /* allocate the matrix space */ 1507 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1508 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1509 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1510 b->j = (int*)(b->a + nz*bs2); 1511 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1512 b->i = b->j + nz; 1513 b->singlemalloc = PETSC_TRUE; 1514 1515 /* pointer to beginning of each row */ 1516 b->i[0] = 0; 1517 for (i=1; i<mbs+1; i++) { 1518 b->i[i] = b->i[i-1] + b->imax[i-1]; 1519 } 1520 1521 /* b->ilen will count nonzeros in each block row so far. */ 1522 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1523 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1524 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1525 1526 b->bs = bs; 1527 b->bs2 = bs2; 1528 b->nz = 0; 1529 b->maxnz = nz*bs2; 1530 1531 b->inew = 0; 1532 b->jnew = 0; 1533 b->anew = 0; 1534 b->a2anew = 0; 1535 b->permute = PETSC_FALSE; 1536 PetscFunctionReturn(0); 1537 } 1538 EXTERN_C_END 1539 1540 EXTERN_C_BEGIN 1541 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1542 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1543 EXTERN_C_END 1544 1545 /*MC 1546 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1547 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1548 1549 Options Database Keys: 1550 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1551 1552 Level: beginner 1553 1554 .seealso: MatCreateSeqSBAIJ 1555 M*/ 1556 1557 EXTERN_C_BEGIN 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1560 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1561 { 1562 Mat_SeqSBAIJ *b; 1563 PetscErrorCode ierr; 1564 int size; 1565 1566 PetscFunctionBegin; 1567 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1568 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1569 B->m = B->M = PetscMax(B->m,B->M); 1570 B->n = B->N = PetscMax(B->n,B->N); 1571 1572 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1573 B->data = (void*)b; 1574 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1575 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1576 B->ops->destroy = MatDestroy_SeqSBAIJ; 1577 B->ops->view = MatView_SeqSBAIJ; 1578 B->factor = 0; 1579 B->lupivotthreshold = 1.0; 1580 B->mapping = 0; 1581 b->row = 0; 1582 b->icol = 0; 1583 b->reallocs = 0; 1584 b->saved_values = 0; 1585 1586 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1587 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1588 1589 b->sorted = PETSC_FALSE; 1590 b->roworiented = PETSC_TRUE; 1591 b->nonew = 0; 1592 b->diag = 0; 1593 b->solve_work = 0; 1594 b->mult_work = 0; 1595 B->spptr = 0; 1596 b->keepzeroedrows = PETSC_FALSE; 1597 b->xtoy = 0; 1598 b->XtoY = 0; 1599 1600 b->inew = 0; 1601 b->jnew = 0; 1602 b->anew = 0; 1603 b->a2anew = 0; 1604 b->permute = PETSC_FALSE; 1605 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1607 "MatStoreValues_SeqSBAIJ", 1608 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1610 "MatRetrieveValues_SeqSBAIJ", 1611 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1613 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1614 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1616 "MatConvert_SeqSBAIJ_SeqAIJ", 1617 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1619 "MatConvert_SeqSBAIJ_SeqBAIJ", 1620 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1622 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1623 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1624 1625 B->symmetric = PETSC_TRUE; 1626 B->structurally_symmetric = PETSC_TRUE; 1627 B->symmetric_set = PETSC_TRUE; 1628 B->structurally_symmetric_set = PETSC_TRUE; 1629 PetscFunctionReturn(0); 1630 } 1631 EXTERN_C_END 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1635 /*@C 1636 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1637 compressed row) format. For good matrix assembly performance the 1638 user should preallocate the matrix storage by setting the parameter nz 1639 (or the array nnz). By setting these parameters accurately, performance 1640 during matrix assembly can be increased by more than a factor of 50. 1641 1642 Collective on Mat 1643 1644 Input Parameters: 1645 + A - the symmetric matrix 1646 . bs - size of block 1647 . nz - number of block nonzeros per block row (same for all rows) 1648 - nnz - array containing the number of block nonzeros in the upper triangular plus 1649 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1650 1651 Options Database Keys: 1652 . -mat_no_unroll - uses code that does not unroll the loops in the 1653 block calculations (much slower) 1654 . -mat_block_size - size of the blocks to use 1655 1656 Level: intermediate 1657 1658 Notes: 1659 Specify the preallocated storage with either nz or nnz (not both). 1660 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1661 allocation. For additional details, see the users manual chapter on 1662 matrices. 1663 1664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1665 @*/ 1666 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1667 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 1668 1669 PetscFunctionBegin; 1670 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1671 if (f) { 1672 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1673 } 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatCreateSeqSBAIJ" 1679 /*@C 1680 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1681 compressed row) format. For good matrix assembly performance the 1682 user should preallocate the matrix storage by setting the parameter nz 1683 (or the array nnz). By setting these parameters accurately, performance 1684 during matrix assembly can be increased by more than a factor of 50. 1685 1686 Collective on MPI_Comm 1687 1688 Input Parameters: 1689 + comm - MPI communicator, set to PETSC_COMM_SELF 1690 . bs - size of block 1691 . m - number of rows, or number of columns 1692 . nz - number of block nonzeros per block row (same for all rows) 1693 - nnz - array containing the number of block nonzeros in the upper triangular plus 1694 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1695 1696 Output Parameter: 1697 . A - the symmetric matrix 1698 1699 Options Database Keys: 1700 . -mat_no_unroll - uses code that does not unroll the loops in the 1701 block calculations (much slower) 1702 . -mat_block_size - size of the blocks to use 1703 1704 Level: intermediate 1705 1706 Notes: 1707 1708 Specify the preallocated storage with either nz or nnz (not both). 1709 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1710 allocation. For additional details, see the users manual chapter on 1711 matrices. 1712 1713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1714 @*/ 1715 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1716 { 1717 PetscErrorCode ierr; 1718 1719 PetscFunctionBegin; 1720 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1721 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1722 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1728 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1729 { 1730 Mat C; 1731 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1732 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1733 1734 PetscFunctionBegin; 1735 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1736 1737 *B = 0; 1738 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1739 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1740 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1741 c = (Mat_SeqSBAIJ*)C->data; 1742 1743 C->preallocated = PETSC_TRUE; 1744 C->factor = A->factor; 1745 c->row = 0; 1746 c->icol = 0; 1747 c->saved_values = 0; 1748 c->keepzeroedrows = a->keepzeroedrows; 1749 C->assembled = PETSC_TRUE; 1750 1751 C->M = A->M; 1752 C->N = A->N; 1753 c->bs = a->bs; 1754 c->bs2 = a->bs2; 1755 c->mbs = a->mbs; 1756 c->nbs = a->nbs; 1757 1758 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1759 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1760 for (i=0; i<mbs; i++) { 1761 c->imax[i] = a->imax[i]; 1762 c->ilen[i] = a->ilen[i]; 1763 } 1764 1765 /* allocate the matrix space */ 1766 c->singlemalloc = PETSC_TRUE; 1767 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1768 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1769 c->j = (int*)(c->a + nz*bs2); 1770 c->i = c->j + nz; 1771 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1772 if (mbs > 0) { 1773 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1774 if (cpvalues == MAT_COPY_VALUES) { 1775 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1776 } else { 1777 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1778 } 1779 } 1780 1781 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1782 c->sorted = a->sorted; 1783 c->roworiented = a->roworiented; 1784 c->nonew = a->nonew; 1785 1786 if (a->diag) { 1787 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1788 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1789 for (i=0; i<mbs; i++) { 1790 c->diag[i] = a->diag[i]; 1791 } 1792 } else c->diag = 0; 1793 c->nz = a->nz; 1794 c->maxnz = a->maxnz; 1795 c->solve_work = 0; 1796 c->mult_work = 0; 1797 *B = C; 1798 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1804 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1805 { 1806 Mat_SeqSBAIJ *a; 1807 Mat B; 1808 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1809 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1810 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1811 int *masked,nmask,tmp,bs2,ishift; 1812 PetscScalar *aa; 1813 MPI_Comm comm = ((PetscObject)viewer)->comm; 1814 1815 PetscFunctionBegin; 1816 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1817 bs2 = bs*bs; 1818 1819 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1820 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1821 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1822 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1823 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1824 M = header[1]; N = header[2]; nz = header[3]; 1825 1826 if (header[3] < 0) { 1827 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1828 } 1829 1830 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1831 1832 /* 1833 This code adds extra rows to make sure the number of rows is 1834 divisible by the blocksize 1835 */ 1836 mbs = M/bs; 1837 extra_rows = bs - M + bs*(mbs); 1838 if (extra_rows == bs) extra_rows = 0; 1839 else mbs++; 1840 if (extra_rows) { 1841 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1842 } 1843 1844 /* read in row lengths */ 1845 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1846 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1847 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1848 1849 /* read in column indices */ 1850 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1851 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1852 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1853 1854 /* loop over row lengths determining block row lengths */ 1855 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1856 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1857 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1858 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1859 masked = mask + mbs; 1860 rowcount = 0; nzcount = 0; 1861 for (i=0; i<mbs; i++) { 1862 nmask = 0; 1863 for (j=0; j<bs; j++) { 1864 kmax = rowlengths[rowcount]; 1865 for (k=0; k<kmax; k++) { 1866 tmp = jj[nzcount++]/bs; /* block col. index */ 1867 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1868 } 1869 rowcount++; 1870 } 1871 s_browlengths[i] += nmask; 1872 1873 /* zero out the mask elements we set */ 1874 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1875 } 1876 1877 /* create our matrix */ 1878 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1879 ierr = MatSetType(B,type);CHKERRQ(ierr); 1880 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1881 a = (Mat_SeqSBAIJ*)B->data; 1882 1883 /* set matrix "i" values */ 1884 a->i[0] = 0; 1885 for (i=1; i<= mbs; i++) { 1886 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1887 a->ilen[i-1] = s_browlengths[i-1]; 1888 } 1889 a->nz = a->i[mbs]; 1890 1891 /* read in nonzero values */ 1892 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1893 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1894 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1895 1896 /* set "a" and "j" values into matrix */ 1897 nzcount = 0; jcount = 0; 1898 for (i=0; i<mbs; i++) { 1899 nzcountb = nzcount; 1900 nmask = 0; 1901 for (j=0; j<bs; j++) { 1902 kmax = rowlengths[i*bs+j]; 1903 for (k=0; k<kmax; k++) { 1904 tmp = jj[nzcount++]/bs; /* block col. index */ 1905 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1906 } 1907 } 1908 /* sort the masked values */ 1909 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1910 1911 /* set "j" values into matrix */ 1912 maskcount = 1; 1913 for (j=0; j<nmask; j++) { 1914 a->j[jcount++] = masked[j]; 1915 mask[masked[j]] = maskcount++; 1916 } 1917 1918 /* set "a" values into matrix */ 1919 ishift = bs2*a->i[i]; 1920 for (j=0; j<bs; j++) { 1921 kmax = rowlengths[i*bs+j]; 1922 for (k=0; k<kmax; k++) { 1923 tmp = jj[nzcountb]/bs ; /* block col. index */ 1924 if (tmp >= i){ 1925 block = mask[tmp] - 1; 1926 point = jj[nzcountb] - bs*tmp; 1927 idx = ishift + bs2*block + j + bs*point; 1928 a->a[idx] = aa[nzcountb]; 1929 } 1930 nzcountb++; 1931 } 1932 } 1933 /* zero out the mask elements we set */ 1934 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1935 } 1936 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1937 1938 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1939 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1940 ierr = PetscFree(aa);CHKERRQ(ierr); 1941 ierr = PetscFree(jj);CHKERRQ(ierr); 1942 ierr = PetscFree(mask);CHKERRQ(ierr); 1943 1944 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1945 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1946 ierr = MatView_Private(B);CHKERRQ(ierr); 1947 *A = B; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1953 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1954 { 1955 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1956 MatScalar *aa=a->a,*v,*v1; 1957 PetscScalar *x,*b,*t,sum,d; 1958 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1959 int nz,nz1,*vj,*vj1,i; 1960 1961 PetscFunctionBegin; 1962 its = its*lits; 1963 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1964 1965 if (bs > 1) 1966 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1967 1968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1969 if (xx != bb) { 1970 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1971 } else { 1972 b = x; 1973 } 1974 1975 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1976 1977 if (flag & SOR_ZERO_INITIAL_GUESS) { 1978 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1979 for (i=0; i<m; i++) 1980 t[i] = b[i]; 1981 1982 for (i=0; i<m; i++){ 1983 d = *(aa + ai[i]); /* diag[i] */ 1984 v = aa + ai[i] + 1; 1985 vj = aj + ai[i] + 1; 1986 nz = ai[i+1] - ai[i] - 1; 1987 x[i] = omega*t[i]/d; 1988 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1989 PetscLogFlops(2*nz-1); 1990 } 1991 } 1992 1993 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1994 for (i=0; i<m; i++) 1995 t[i] = b[i]; 1996 1997 for (i=0; i<m-1; i++){ /* update rhs */ 1998 v = aa + ai[i] + 1; 1999 vj = aj + ai[i] + 1; 2000 nz = ai[i+1] - ai[i] - 1; 2001 while (nz--) t[*vj++] -= x[i]*(*v++); 2002 PetscLogFlops(2*nz-1); 2003 } 2004 for (i=m-1; i>=0; i--){ 2005 d = *(aa + ai[i]); 2006 v = aa + ai[i] + 1; 2007 vj = aj + ai[i] + 1; 2008 nz = ai[i+1] - ai[i] - 1; 2009 sum = t[i]; 2010 while (nz--) sum -= x[*vj++]*(*v++); 2011 PetscLogFlops(2*nz-1); 2012 x[i] = (1-omega)*x[i] + omega*sum/d; 2013 } 2014 } 2015 its--; 2016 } 2017 2018 while (its--) { 2019 /* 2020 forward sweep: 2021 for i=0,...,m-1: 2022 sum[i] = (b[i] - U(i,:)x )/d[i]; 2023 x[i] = (1-omega)x[i] + omega*sum[i]; 2024 b = b - x[i]*U^T(i,:); 2025 2026 */ 2027 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2028 for (i=0; i<m; i++) 2029 t[i] = b[i]; 2030 2031 for (i=0; i<m; i++){ 2032 d = *(aa + ai[i]); /* diag[i] */ 2033 v = aa + ai[i] + 1; v1=v; 2034 vj = aj + ai[i] + 1; vj1=vj; 2035 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2036 sum = t[i]; 2037 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2038 x[i] = (1-omega)*x[i] + omega*sum/d; 2039 while (nz--) t[*vj++] -= x[i]*(*v++); 2040 PetscLogFlops(4*nz-2); 2041 } 2042 } 2043 2044 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2045 /* 2046 backward sweep: 2047 b = b - x[i]*U^T(i,:), i=0,...,n-2 2048 for i=m-1,...,0: 2049 sum[i] = (b[i] - U(i,:)x )/d[i]; 2050 x[i] = (1-omega)x[i] + omega*sum[i]; 2051 */ 2052 for (i=0; i<m; i++) 2053 t[i] = b[i]; 2054 2055 for (i=0; i<m-1; i++){ /* update rhs */ 2056 v = aa + ai[i] + 1; 2057 vj = aj + ai[i] + 1; 2058 nz = ai[i+1] - ai[i] - 1; 2059 while (nz--) t[*vj++] -= x[i]*(*v++); 2060 PetscLogFlops(2*nz-1); 2061 } 2062 for (i=m-1; i>=0; i--){ 2063 d = *(aa + ai[i]); 2064 v = aa + ai[i] + 1; 2065 vj = aj + ai[i] + 1; 2066 nz = ai[i+1] - ai[i] - 1; 2067 sum = t[i]; 2068 while (nz--) sum -= x[*vj++]*(*v++); 2069 PetscLogFlops(2*nz-1); 2070 x[i] = (1-omega)*x[i] + omega*sum/d; 2071 } 2072 } 2073 } 2074 2075 ierr = PetscFree(t);CHKERRQ(ierr); 2076 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2077 if (bb != xx) { 2078 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2079 } 2080 2081 PetscFunctionReturn(0); 2082 } 2083 2084 2085 2086 2087 2088 2089