1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 int *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 int i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 110 ierr = PetscFree(a->a);CHKERRQ(ierr); 111 if (!a->singlemalloc) { 112 ierr = PetscFree(a->i);CHKERRQ(ierr); 113 ierr = PetscFree(a->j);CHKERRQ(ierr); 114 } 115 if (a->row) { 116 ierr = ISDestroy(a->row);CHKERRQ(ierr); 117 } 118 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 119 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 120 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 121 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 122 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 123 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 124 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 125 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 126 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 127 128 if (a->inew){ 129 ierr = PetscFree(a->inew);CHKERRQ(ierr); 130 a->inew = 0; 131 } 132 ierr = PetscFree(a);CHKERRQ(ierr); 133 134 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 135 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 146 { 147 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148 149 PetscFunctionBegin; 150 switch (op) { 151 case MAT_ROW_ORIENTED: 152 a->roworiented = PETSC_TRUE; 153 break; 154 case MAT_COLUMN_ORIENTED: 155 a->roworiented = PETSC_FALSE; 156 break; 157 case MAT_COLUMNS_SORTED: 158 a->sorted = PETSC_TRUE; 159 break; 160 case MAT_COLUMNS_UNSORTED: 161 a->sorted = PETSC_FALSE; 162 break; 163 case MAT_KEEP_ZEROED_ROWS: 164 a->keepzeroedrows = PETSC_TRUE; 165 break; 166 case MAT_NO_NEW_NONZERO_LOCATIONS: 167 a->nonew = 1; 168 break; 169 case MAT_NEW_NONZERO_LOCATION_ERR: 170 a->nonew = -1; 171 break; 172 case MAT_NEW_NONZERO_ALLOCATION_ERR: 173 a->nonew = -2; 174 break; 175 case MAT_YES_NEW_NONZERO_LOCATIONS: 176 a->nonew = 0; 177 break; 178 case MAT_ROWS_SORTED: 179 case MAT_ROWS_UNSORTED: 180 case MAT_YES_NEW_DIAGONALS: 181 case MAT_IGNORE_OFF_PROC_ENTRIES: 182 case MAT_USE_HASH_TABLE: 183 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 184 break; 185 case MAT_NO_NEW_DIAGONALS: 186 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 187 case MAT_NOT_SYMMETRIC: 188 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 189 case MAT_HERMITIAN: 190 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 191 case MAT_SYMMETRIC: 192 case MAT_STRUCTURALLY_SYMMETRIC: 193 case MAT_NOT_HERMITIAN: 194 case MAT_SYMMETRY_ETERNAL: 195 case MAT_NOT_SYMMETRY_ETERNAL: 196 break; 197 default: 198 SETERRQ(PETSC_ERR_SUP,"unknown option"); 199 } 200 PetscFunctionReturn(0); 201 } 202 203 #undef __FUNCT__ 204 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 205 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 206 { 207 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 208 PetscErrorCode ierr; 209 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 210 MatScalar *aa,*aa_i; 211 PetscScalar *v_i; 212 213 PetscFunctionBegin; 214 bs = a->bs; 215 ai = a->i; 216 aj = a->j; 217 aa = a->a; 218 bs2 = a->bs2; 219 220 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 221 222 bn = row/bs; /* Block number */ 223 bp = row % bs; /* Block position */ 224 M = ai[bn+1] - ai[bn]; 225 *ncols = bs*M; 226 227 if (v) { 228 *v = 0; 229 if (*ncols) { 230 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 231 for (i=0; i<M; i++) { /* for each block in the block row */ 232 v_i = *v + i*bs; 233 aa_i = aa + bs2*(ai[bn] + i); 234 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 235 } 236 } 237 } 238 239 if (cols) { 240 *cols = 0; 241 if (*ncols) { 242 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 243 for (i=0; i<M; i++) { /* for each block in the block row */ 244 cols_i = *cols + i*bs; 245 itmp = bs*aj[ai[bn] + i]; 246 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 247 } 248 } 249 } 250 251 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 252 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 253 #ifdef column_search 254 v_i = *v + M*bs; 255 cols_i = *cols + M*bs; 256 for (i=0; i<bn; i++){ /* for each block row */ 257 M = ai[i+1] - ai[i]; 258 for (j=0; j<M; j++){ 259 itmp = aj[ai[i] + j]; /* block column value */ 260 if (itmp == bn){ 261 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 262 for (k=0; k<bs; k++) { 263 *cols_i++ = i*bs+k; 264 *v_i++ = aa_i[k]; 265 } 266 *ncols += bs; 267 break; 268 } 269 } 270 } 271 #endif 272 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 278 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 279 { 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 284 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 290 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 291 { 292 PetscErrorCode ierr; 293 PetscFunctionBegin; 294 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 295 PetscFunctionReturn(0); 296 } 297 298 #undef __FUNCT__ 299 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 300 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 301 { 302 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 303 PetscErrorCode ierr; 304 int i,j,bs = a->bs,k,l,bs2=a->bs2; 305 char *name; 306 PetscViewerFormat format; 307 308 PetscFunctionBegin; 309 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 310 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 311 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 312 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 313 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 314 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 315 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 316 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 317 for (i=0; i<a->mbs; i++) { 318 for (j=0; j<bs; j++) { 319 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 320 for (k=a->i[i]; k<a->i[i+1]; k++) { 321 for (l=0; l<bs; l++) { 322 #if defined(PETSC_USE_COMPLEX) 323 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 325 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 326 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 327 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 328 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 329 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 330 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 331 } 332 #else 333 if (a->a[bs2*k + l*bs + j] != 0.0) { 334 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 335 } 336 #endif 337 } 338 } 339 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 340 } 341 } 342 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 343 } else { 344 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 345 for (i=0; i<a->mbs; i++) { 346 for (j=0; j<bs; j++) { 347 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 348 for (k=a->i[i]; k<a->i[i+1]; k++) { 349 for (l=0; l<bs; l++) { 350 #if defined(PETSC_USE_COMPLEX) 351 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 352 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 353 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 354 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 355 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 356 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 357 } else { 358 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 359 } 360 #else 361 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 362 #endif 363 } 364 } 365 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 366 } 367 } 368 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 369 } 370 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 374 #undef __FUNCT__ 375 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 376 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 377 { 378 Mat A = (Mat) Aa; 379 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 380 PetscErrorCode ierr; 381 int row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 382 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 383 MatScalar *aa; 384 MPI_Comm comm; 385 PetscViewer viewer; 386 387 PetscFunctionBegin; 388 /* 389 This is nasty. If this is called from an originally parallel matrix 390 then all processes call this,but only the first has the matrix so the 391 rest should return immediately. 392 */ 393 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 394 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 395 if (rank) PetscFunctionReturn(0); 396 397 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 398 399 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 400 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 401 402 /* loop over matrix elements drawing boxes */ 403 color = PETSC_DRAW_BLUE; 404 for (i=0,row=0; i<mbs; i++,row+=bs) { 405 for (j=a->i[i]; j<a->i[i+1]; j++) { 406 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 407 x_l = a->j[j]*bs; x_r = x_l + 1.0; 408 aa = a->a + j*bs2; 409 for (k=0; k<bs; k++) { 410 for (l=0; l<bs; l++) { 411 if (PetscRealPart(*aa++) >= 0.) continue; 412 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 413 } 414 } 415 } 416 } 417 color = PETSC_DRAW_CYAN; 418 for (i=0,row=0; i<mbs; i++,row+=bs) { 419 for (j=a->i[i]; j<a->i[i+1]; j++) { 420 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 421 x_l = a->j[j]*bs; x_r = x_l + 1.0; 422 aa = a->a + j*bs2; 423 for (k=0; k<bs; k++) { 424 for (l=0; l<bs; l++) { 425 if (PetscRealPart(*aa++) != 0.) continue; 426 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 427 } 428 } 429 } 430 } 431 432 color = PETSC_DRAW_RED; 433 for (i=0,row=0; i<mbs; i++,row+=bs) { 434 for (j=a->i[i]; j<a->i[i+1]; j++) { 435 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 436 x_l = a->j[j]*bs; x_r = x_l + 1.0; 437 aa = a->a + j*bs2; 438 for (k=0; k<bs; k++) { 439 for (l=0; l<bs; l++) { 440 if (PetscRealPart(*aa++) <= 0.) continue; 441 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 442 } 443 } 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 451 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 452 { 453 PetscErrorCode ierr; 454 PetscReal xl,yl,xr,yr,w,h; 455 PetscDraw draw; 456 PetscTruth isnull; 457 458 PetscFunctionBegin; 459 460 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 461 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 462 463 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 464 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 465 xr += w; yr += h; xl = -w; yl = -h; 466 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 467 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 468 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatView_SeqSBAIJ" 474 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 475 { 476 PetscErrorCode ierr; 477 PetscTruth iascii,isdraw; 478 479 PetscFunctionBegin; 480 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 481 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 482 if (iascii){ 483 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 484 } else if (isdraw) { 485 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 486 } else { 487 Mat B; 488 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 489 ierr = MatView(B,viewer);CHKERRQ(ierr); 490 ierr = MatDestroy(B);CHKERRQ(ierr); 491 } 492 PetscFunctionReturn(0); 493 } 494 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 498 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 499 { 500 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 501 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 502 int *ai = a->i,*ailen = a->ilen; 503 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 504 MatScalar *ap,*aa = a->a,zero = 0.0; 505 506 PetscFunctionBegin; 507 for (k=0; k<m; k++) { /* loop over rows */ 508 row = im[k]; brow = row/bs; 509 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 510 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 511 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 512 nrow = ailen[brow]; 513 for (l=0; l<n; l++) { /* loop over columns */ 514 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 515 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 516 col = in[l] ; 517 bcol = col/bs; 518 cidx = col%bs; 519 ridx = row%bs; 520 high = nrow; 521 low = 0; /* assume unsorted */ 522 while (high-low > 5) { 523 t = (low+high)/2; 524 if (rp[t] > bcol) high = t; 525 else low = t; 526 } 527 for (i=low; i<high; i++) { 528 if (rp[i] > bcol) break; 529 if (rp[i] == bcol) { 530 *v++ = ap[bs2*i+bs*cidx+ridx]; 531 goto finished; 532 } 533 } 534 *v++ = zero; 535 finished:; 536 } 537 } 538 PetscFunctionReturn(0); 539 } 540 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 544 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 545 { 546 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 547 PetscErrorCode ierr; 548 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 549 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 550 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 551 PetscTruth roworiented=a->roworiented; 552 const MatScalar *value = v; 553 MatScalar *ap,*aa = a->a,*bap; 554 555 PetscFunctionBegin; 556 if (roworiented) { 557 stepval = (n-1)*bs; 558 } else { 559 stepval = (m-1)*bs; 560 } 561 for (k=0; k<m; k++) { /* loop over added rows */ 562 row = im[k]; 563 if (row < 0) continue; 564 #if defined(PETSC_USE_BOPT_g) 565 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 566 #endif 567 rp = aj + ai[row]; 568 ap = aa + bs2*ai[row]; 569 rmax = imax[row]; 570 nrow = ailen[row]; 571 low = 0; 572 for (l=0; l<n; l++) { /* loop over added columns */ 573 if (in[l] < 0) continue; 574 col = in[l]; 575 #if defined(PETSC_USE_BOPT_g) 576 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 577 #endif 578 if (col < row) continue; /* ignore lower triangular block */ 579 if (roworiented) { 580 value = v + k*(stepval+bs)*bs + l*bs; 581 } else { 582 value = v + l*(stepval+bs)*bs + k*bs; 583 } 584 if (!sorted) low = 0; high = nrow; 585 while (high-low > 7) { 586 t = (low+high)/2; 587 if (rp[t] > col) high = t; 588 else low = t; 589 } 590 for (i=low; i<high; i++) { 591 if (rp[i] > col) break; 592 if (rp[i] == col) { 593 bap = ap + bs2*i; 594 if (roworiented) { 595 if (is == ADD_VALUES) { 596 for (ii=0; ii<bs; ii++,value+=stepval) { 597 for (jj=ii; jj<bs2; jj+=bs) { 598 bap[jj] += *value++; 599 } 600 } 601 } else { 602 for (ii=0; ii<bs; ii++,value+=stepval) { 603 for (jj=ii; jj<bs2; jj+=bs) { 604 bap[jj] = *value++; 605 } 606 } 607 } 608 } else { 609 if (is == ADD_VALUES) { 610 for (ii=0; ii<bs; ii++,value+=stepval) { 611 for (jj=0; jj<bs; jj++) { 612 *bap++ += *value++; 613 } 614 } 615 } else { 616 for (ii=0; ii<bs; ii++,value+=stepval) { 617 for (jj=0; jj<bs; jj++) { 618 *bap++ = *value++; 619 } 620 } 621 } 622 } 623 goto noinsert2; 624 } 625 } 626 if (nonew == 1) goto noinsert2; 627 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 628 if (nrow >= rmax) { 629 /* there is no extra room in row, therefore enlarge */ 630 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 631 MatScalar *new_a; 632 633 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 634 635 /* malloc new storage space */ 636 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 637 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 638 new_j = (int*)(new_a + bs2*new_nz); 639 new_i = new_j + new_nz; 640 641 /* copy over old data into new slots */ 642 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 643 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 644 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 645 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 646 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 647 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 648 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 649 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 650 /* free up old matrix storage */ 651 ierr = PetscFree(a->a);CHKERRQ(ierr); 652 if (!a->singlemalloc) { 653 ierr = PetscFree(a->i);CHKERRQ(ierr); 654 ierr = PetscFree(a->j);CHKERRQ(ierr); 655 } 656 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 657 a->singlemalloc = PETSC_TRUE; 658 659 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 660 rmax = imax[row] = imax[row] + CHUNKSIZE; 661 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 662 a->maxnz += bs2*CHUNKSIZE; 663 a->reallocs++; 664 a->nz++; 665 } 666 N = nrow++ - 1; 667 /* shift up all the later entries in this row */ 668 for (ii=N; ii>=i; ii--) { 669 rp[ii+1] = rp[ii]; 670 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 671 } 672 if (N >= i) { 673 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 674 } 675 rp[i] = col; 676 bap = ap + bs2*i; 677 if (roworiented) { 678 for (ii=0; ii<bs; ii++,value+=stepval) { 679 for (jj=ii; jj<bs2; jj+=bs) { 680 bap[jj] = *value++; 681 } 682 } 683 } else { 684 for (ii=0; ii<bs; ii++,value+=stepval) { 685 for (jj=0; jj<bs; jj++) { 686 *bap++ = *value++; 687 } 688 } 689 } 690 noinsert2:; 691 low = i; 692 } 693 ailen[row] = nrow; 694 } 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 700 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 701 { 702 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 703 PetscErrorCode ierr; 704 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 705 int m = A->m,*ip,N,*ailen = a->ilen; 706 int mbs = a->mbs,bs2 = a->bs2,rmax = 0; 707 MatScalar *aa = a->a,*ap; 708 709 PetscFunctionBegin; 710 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 711 712 if (m) rmax = ailen[0]; 713 for (i=1; i<mbs; i++) { 714 /* move each row back by the amount of empty slots (fshift) before it*/ 715 fshift += imax[i-1] - ailen[i-1]; 716 rmax = PetscMax(rmax,ailen[i]); 717 if (fshift) { 718 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 719 N = ailen[i]; 720 for (j=0; j<N; j++) { 721 ip[j-fshift] = ip[j]; 722 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 723 } 724 } 725 ai[i] = ai[i-1] + ailen[i-1]; 726 } 727 if (mbs) { 728 fshift += imax[mbs-1] - ailen[mbs-1]; 729 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 730 } 731 /* reset ilen and imax for each row */ 732 for (i=0; i<mbs; i++) { 733 ailen[i] = imax[i] = ai[i+1] - ai[i]; 734 } 735 a->nz = ai[mbs]; 736 737 /* diagonals may have moved, reset it */ 738 if (a->diag) { 739 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 740 } 741 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 742 m,A->m,a->bs,fshift*bs2,a->nz*bs2); 743 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 744 a->reallocs); 745 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 746 a->reallocs = 0; 747 A->info.nz_unneeded = (PetscReal)fshift*bs2; 748 749 PetscFunctionReturn(0); 750 } 751 752 /* 753 This function returns an array of flags which indicate the locations of contiguous 754 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 755 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 756 Assume: sizes should be long enough to hold all the values. 757 */ 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 760 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 761 { 762 int i,j,k,row; 763 PetscTruth flg; 764 765 PetscFunctionBegin; 766 for (i=0,j=0; i<n; j++) { 767 row = idx[i]; 768 if (row%bs!=0) { /* Not the begining of a block */ 769 sizes[j] = 1; 770 i++; 771 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 772 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 773 i++; 774 } else { /* Begining of the block, so check if the complete block exists */ 775 flg = PETSC_TRUE; 776 for (k=1; k<bs; k++) { 777 if (row+k != idx[i+k]) { /* break in the block */ 778 flg = PETSC_FALSE; 779 break; 780 } 781 } 782 if (flg == PETSC_TRUE) { /* No break in the bs */ 783 sizes[j] = bs; 784 i+= bs; 785 } else { 786 sizes[j] = 1; 787 i++; 788 } 789 } 790 } 791 *bs_max = j; 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 797 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 798 { 799 PetscFunctionBegin; 800 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 801 } 802 803 /* Only add/insert a(i,j) with i<=j (blocks). 804 Any a(i,j) with i>j input by user is ingored. 805 */ 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 809 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 810 { 811 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 812 PetscErrorCode ierr; 813 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 814 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 815 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 816 int ridx,cidx,bs2=a->bs2; 817 MatScalar *ap,value,*aa=a->a,*bap; 818 819 PetscFunctionBegin; 820 821 for (k=0; k<m; k++) { /* loop over added rows */ 822 row = im[k]; /* row number */ 823 brow = row/bs; /* block row number */ 824 if (row < 0) continue; 825 #if defined(PETSC_USE_BOPT_g) 826 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 827 #endif 828 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 829 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 830 rmax = imax[brow]; /* maximum space allocated for this row */ 831 nrow = ailen[brow]; /* actual length of this row */ 832 low = 0; 833 834 for (l=0; l<n; l++) { /* loop over added columns */ 835 if (in[l] < 0) continue; 836 #if defined(PETSC_USE_BOPT_g) 837 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 838 #endif 839 col = in[l]; 840 bcol = col/bs; /* block col number */ 841 842 if (brow <= bcol){ 843 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 844 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 845 /* element value a(k,l) */ 846 if (roworiented) { 847 value = v[l + k*n]; 848 } else { 849 value = v[k + l*m]; 850 } 851 852 /* move pointer bap to a(k,l) quickly and add/insert value */ 853 if (!sorted) low = 0; high = nrow; 854 while (high-low > 7) { 855 t = (low+high)/2; 856 if (rp[t] > bcol) high = t; 857 else low = t; 858 } 859 for (i=low; i<high; i++) { 860 /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */ 861 if (rp[i] > bcol) break; 862 if (rp[i] == bcol) { 863 bap = ap + bs2*i + bs*cidx + ridx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 867 if (brow == bcol && ridx < cidx){ 868 bap = ap + bs2*i + bs*ridx + cidx; 869 if (is == ADD_VALUES) *bap += value; 870 else *bap = value; 871 } 872 goto noinsert1; 873 } 874 } 875 876 if (nonew == 1) goto noinsert1; 877 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 878 if (nrow >= rmax) { 879 /* there is no extra room in row, therefore enlarge */ 880 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 881 MatScalar *new_a; 882 883 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 884 885 /* Malloc new storage space */ 886 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 887 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 888 new_j = (int*)(new_a + bs2*new_nz); 889 new_i = new_j + new_nz; 890 891 /* copy over old data into new slots */ 892 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 893 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 894 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 895 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 896 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 897 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 898 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 899 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 900 /* free up old matrix storage */ 901 ierr = PetscFree(a->a);CHKERRQ(ierr); 902 if (!a->singlemalloc) { 903 ierr = PetscFree(a->i);CHKERRQ(ierr); 904 ierr = PetscFree(a->j);CHKERRQ(ierr); 905 } 906 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 907 a->singlemalloc = PETSC_TRUE; 908 909 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 910 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 911 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 912 a->maxnz += bs2*CHUNKSIZE; 913 a->reallocs++; 914 a->nz++; 915 } 916 917 N = nrow++ - 1; 918 /* shift up all the later entries in this row */ 919 for (ii=N; ii>=i; ii--) { 920 rp[ii+1] = rp[ii]; 921 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 922 } 923 if (N>=i) { 924 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 925 } 926 rp[i] = bcol; 927 ap[bs2*i + bs*cidx + ridx] = value; 928 noinsert1:; 929 low = i; 930 /* } */ 931 } 932 } /* end of if .. if.. */ 933 } /* end of loop over added columns */ 934 ailen[brow] = nrow; 935 } /* end of loop over added rows */ 936 937 PetscFunctionReturn(0); 938 } 939 940 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 941 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 942 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 943 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 944 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 945 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 946 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 947 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 948 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 949 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 951 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 952 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 953 954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 962 963 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 964 965 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 966 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 967 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 968 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 973 974 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 975 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 976 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 977 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 982 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 983 984 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 985 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 986 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 987 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 992 993 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 994 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 995 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 996 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1001 1002 #ifdef HAVE_MatICCFactor 1003 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1006 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1007 { 1008 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1009 Mat outA; 1010 PetscErrorCode ierr; 1011 PetscTruth row_identity,col_identity; 1012 1013 PetscFunctionBegin; 1014 outA = inA; 1015 inA->factor = FACTOR_CHOLESKY; 1016 1017 if (!a->diag) { 1018 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1019 } 1020 /* 1021 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1022 for ILU(0) factorization with natural ordering 1023 */ 1024 switch (a->bs) { 1025 case 1: 1026 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1027 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1028 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1029 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1030 case 2: 1031 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1032 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1033 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1034 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1035 break; 1036 case 3: 1037 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1038 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1039 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1040 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1041 break; 1042 case 4: 1043 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1044 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1045 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1046 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1047 break; 1048 case 5: 1049 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1050 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1051 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1052 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1053 break; 1054 case 6: 1055 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1056 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1057 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1058 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1059 break; 1060 case 7: 1061 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1062 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1063 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1064 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1065 break; 1066 default: 1067 a->row = row; 1068 a->icol = col; 1069 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1070 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1071 1072 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1073 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1074 PetscLogObjectParent(inA,a->icol); 1075 1076 if (!a->solve_work) { 1077 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1078 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1079 } 1080 } 1081 1082 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1083 1084 PetscFunctionReturn(0); 1085 } 1086 #endif 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1090 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1091 { 1092 static PetscTruth called = PETSC_FALSE; 1093 MPI_Comm comm = A->comm; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1098 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1099 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 EXTERN_C_BEGIN 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1106 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1107 { 1108 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1109 int i,nz,n; 1110 1111 PetscFunctionBegin; 1112 nz = baij->maxnz; 1113 n = mat->n; 1114 for (i=0; i<nz; i++) { 1115 baij->j[i] = indices[i]; 1116 } 1117 baij->nz = nz; 1118 for (i=0; i<n; i++) { 1119 baij->ilen[i] = baij->imax[i]; 1120 } 1121 1122 PetscFunctionReturn(0); 1123 } 1124 EXTERN_C_END 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1128 /*@ 1129 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1130 in the matrix. 1131 1132 Input Parameters: 1133 + mat - the SeqSBAIJ matrix 1134 - indices - the column indices 1135 1136 Level: advanced 1137 1138 Notes: 1139 This can be called if you have precomputed the nonzero structure of the 1140 matrix and want to provide it to the matrix object to improve the performance 1141 of the MatSetValues() operation. 1142 1143 You MUST have set the correct numbers of nonzeros per row in the call to 1144 MatCreateSeqSBAIJ(). 1145 1146 MUST be called before any calls to MatSetValues() 1147 1148 .seealso: MatCreateSeqSBAIJ 1149 @*/ 1150 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1151 { 1152 PetscErrorCode ierr,(*f)(Mat,int *); 1153 1154 PetscFunctionBegin; 1155 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1156 PetscValidPointer(indices,2); 1157 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1158 if (f) { 1159 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1160 } else { 1161 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1162 } 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1168 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1169 { 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1179 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1180 { 1181 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1182 PetscFunctionBegin; 1183 *array = a->a; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1189 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1190 { 1191 PetscFunctionBegin; 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #include "petscblaslapack.h" 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1198 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1199 { 1200 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1201 PetscErrorCode ierr; 1202 int i,bs=y->bs,bs2,j; 1203 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1204 1205 PetscFunctionBegin; 1206 if (str == SAME_NONZERO_PATTERN) { 1207 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1208 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1209 if (y->xtoy && y->XtoY != X) { 1210 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1211 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1212 } 1213 if (!y->xtoy) { /* get xtoy */ 1214 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1215 y->XtoY = X; 1216 } 1217 bs2 = bs*bs; 1218 for (i=0; i<x->nz; i++) { 1219 j = 0; 1220 while (j < bs2){ 1221 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1222 j++; 1223 } 1224 } 1225 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1226 } else { 1227 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1228 } 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1234 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1235 { 1236 PetscFunctionBegin; 1237 *flg = PETSC_TRUE; 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1243 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1244 { 1245 PetscFunctionBegin; 1246 *flg = PETSC_TRUE; 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1252 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1253 { 1254 PetscFunctionBegin; 1255 *flg = PETSC_FALSE; 1256 PetscFunctionReturn(0); 1257 } 1258 1259 /* -------------------------------------------------------------------*/ 1260 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1261 MatGetRow_SeqSBAIJ, 1262 MatRestoreRow_SeqSBAIJ, 1263 MatMult_SeqSBAIJ_N, 1264 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1265 MatMult_SeqSBAIJ_N, 1266 MatMultAdd_SeqSBAIJ_N, 1267 MatSolve_SeqSBAIJ_N, 1268 0, 1269 0, 1270 /*10*/ 0, 1271 0, 1272 MatCholeskyFactor_SeqSBAIJ, 1273 MatRelax_SeqSBAIJ, 1274 MatTranspose_SeqSBAIJ, 1275 /*15*/ MatGetInfo_SeqSBAIJ, 1276 MatEqual_SeqSBAIJ, 1277 MatGetDiagonal_SeqSBAIJ, 1278 MatDiagonalScale_SeqSBAIJ, 1279 MatNorm_SeqSBAIJ, 1280 /*20*/ 0, 1281 MatAssemblyEnd_SeqSBAIJ, 1282 0, 1283 MatSetOption_SeqSBAIJ, 1284 MatZeroEntries_SeqSBAIJ, 1285 /*25*/ MatZeroRows_SeqSBAIJ, 1286 0, 1287 0, 1288 MatCholeskyFactorSymbolic_SeqSBAIJ, 1289 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1290 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1291 0, 1292 MatICCFactorSymbolic_SeqSBAIJ, 1293 MatGetArray_SeqSBAIJ, 1294 MatRestoreArray_SeqSBAIJ, 1295 /*35*/ MatDuplicate_SeqSBAIJ, 1296 0, 1297 0, 1298 0, 1299 0, 1300 /*40*/ MatAXPY_SeqSBAIJ, 1301 MatGetSubMatrices_SeqSBAIJ, 1302 MatIncreaseOverlap_SeqSBAIJ, 1303 MatGetValues_SeqSBAIJ, 1304 0, 1305 /*45*/ MatPrintHelp_SeqSBAIJ, 1306 MatScale_SeqSBAIJ, 1307 0, 1308 0, 1309 0, 1310 /*50*/ MatGetBlockSize_SeqSBAIJ, 1311 MatGetRowIJ_SeqSBAIJ, 1312 MatRestoreRowIJ_SeqSBAIJ, 1313 0, 1314 0, 1315 /*55*/ 0, 1316 0, 1317 0, 1318 0, 1319 MatSetValuesBlocked_SeqSBAIJ, 1320 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1321 0, 1322 0, 1323 MatGetPetscMaps_Petsc, 1324 0, 1325 /*65*/ 0, 1326 0, 1327 0, 1328 0, 1329 0, 1330 /*70*/ MatGetRowMax_SeqSBAIJ, 1331 0, 1332 0, 1333 0, 1334 0, 1335 /*75*/ 0, 1336 0, 1337 0, 1338 0, 1339 0, 1340 /*80*/ 0, 1341 0, 1342 0, 1343 #if !defined(PETSC_USE_COMPLEX) 1344 MatGetInertia_SeqSBAIJ, 1345 #else 1346 0, 1347 #endif 1348 MatLoad_SeqSBAIJ, 1349 /*85*/ MatIsSymmetric_SeqSBAIJ, 1350 MatIsHermitian_SeqSBAIJ, 1351 MatIsStructurallySymmetric_SeqSBAIJ, 1352 0, 1353 0, 1354 /*90*/ 0, 1355 0, 1356 0, 1357 0, 1358 0, 1359 /*95*/ 0, 1360 0, 1361 0, 1362 0}; 1363 1364 EXTERN_C_BEGIN 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1367 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1368 { 1369 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1370 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 if (aij->nonew != 1) { 1375 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1376 } 1377 1378 /* allocate space for values if not already there */ 1379 if (!aij->saved_values) { 1380 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1381 } 1382 1383 /* copy values over */ 1384 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 EXTERN_C_END 1388 1389 EXTERN_C_BEGIN 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1392 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1393 { 1394 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1395 PetscErrorCode ierr; 1396 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1397 1398 PetscFunctionBegin; 1399 if (aij->nonew != 1) { 1400 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1401 } 1402 if (!aij->saved_values) { 1403 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1404 } 1405 1406 /* copy values over */ 1407 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 EXTERN_C_END 1411 1412 EXTERN_C_BEGIN 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1415 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1416 { 1417 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1418 PetscErrorCode ierr; 1419 int i,len,mbs,bs2; 1420 PetscTruth flg; 1421 1422 PetscFunctionBegin; 1423 B->preallocated = PETSC_TRUE; 1424 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1425 mbs = B->m/bs; 1426 bs2 = bs*bs; 1427 1428 if (mbs*bs != B->m) { 1429 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1430 } 1431 1432 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1433 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1434 if (nnz) { 1435 for (i=0; i<mbs; i++) { 1436 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1437 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1438 } 1439 } 1440 1441 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1442 if (!flg) { 1443 switch (bs) { 1444 case 1: 1445 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1446 B->ops->solve = MatSolve_SeqSBAIJ_1; 1447 B->ops->solves = MatSolves_SeqSBAIJ_1; 1448 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1449 B->ops->mult = MatMult_SeqSBAIJ_1; 1450 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1451 break; 1452 case 2: 1453 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1454 B->ops->solve = MatSolve_SeqSBAIJ_2; 1455 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1456 B->ops->mult = MatMult_SeqSBAIJ_2; 1457 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1458 break; 1459 case 3: 1460 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1461 B->ops->solve = MatSolve_SeqSBAIJ_3; 1462 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1463 B->ops->mult = MatMult_SeqSBAIJ_3; 1464 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1465 break; 1466 case 4: 1467 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1468 B->ops->solve = MatSolve_SeqSBAIJ_4; 1469 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1470 B->ops->mult = MatMult_SeqSBAIJ_4; 1471 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1472 break; 1473 case 5: 1474 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1475 B->ops->solve = MatSolve_SeqSBAIJ_5; 1476 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1477 B->ops->mult = MatMult_SeqSBAIJ_5; 1478 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1479 break; 1480 case 6: 1481 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1482 B->ops->solve = MatSolve_SeqSBAIJ_6; 1483 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1484 B->ops->mult = MatMult_SeqSBAIJ_6; 1485 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1486 break; 1487 case 7: 1488 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1489 B->ops->solve = MatSolve_SeqSBAIJ_7; 1490 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1491 B->ops->mult = MatMult_SeqSBAIJ_7; 1492 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1493 break; 1494 } 1495 } 1496 1497 b->mbs = mbs; 1498 b->nbs = mbs; 1499 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1500 if (!nnz) { 1501 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1502 else if (nz <= 0) nz = 1; 1503 for (i=0; i<mbs; i++) { 1504 b->imax[i] = nz; 1505 } 1506 nz = nz*mbs; /* total nz */ 1507 } else { 1508 nz = 0; 1509 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1510 } 1511 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1512 1513 /* allocate the matrix space */ 1514 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1515 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1516 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1517 b->j = (int*)(b->a + nz*bs2); 1518 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1519 b->i = b->j + nz; 1520 b->singlemalloc = PETSC_TRUE; 1521 1522 /* pointer to beginning of each row */ 1523 b->i[0] = 0; 1524 for (i=1; i<mbs+1; i++) { 1525 b->i[i] = b->i[i-1] + b->imax[i-1]; 1526 } 1527 1528 /* b->ilen will count nonzeros in each block row so far. */ 1529 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1530 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1531 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1532 1533 b->bs = bs; 1534 b->bs2 = bs2; 1535 b->nz = 0; 1536 b->maxnz = nz*bs2; 1537 1538 b->inew = 0; 1539 b->jnew = 0; 1540 b->anew = 0; 1541 b->a2anew = 0; 1542 b->permute = PETSC_FALSE; 1543 PetscFunctionReturn(0); 1544 } 1545 EXTERN_C_END 1546 1547 EXTERN_C_BEGIN 1548 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1549 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1550 EXTERN_C_END 1551 1552 /*MC 1553 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1554 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1555 1556 Options Database Keys: 1557 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1558 1559 Level: beginner 1560 1561 .seealso: MatCreateSeqSBAIJ 1562 M*/ 1563 1564 EXTERN_C_BEGIN 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1567 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1568 { 1569 Mat_SeqSBAIJ *b; 1570 PetscErrorCode ierr; 1571 int size; 1572 1573 PetscFunctionBegin; 1574 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1575 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1576 B->m = B->M = PetscMax(B->m,B->M); 1577 B->n = B->N = PetscMax(B->n,B->N); 1578 1579 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1580 B->data = (void*)b; 1581 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1582 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1583 B->ops->destroy = MatDestroy_SeqSBAIJ; 1584 B->ops->view = MatView_SeqSBAIJ; 1585 B->factor = 0; 1586 B->lupivotthreshold = 1.0; 1587 B->mapping = 0; 1588 b->row = 0; 1589 b->icol = 0; 1590 b->reallocs = 0; 1591 b->saved_values = 0; 1592 1593 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1594 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1595 1596 b->sorted = PETSC_FALSE; 1597 b->roworiented = PETSC_TRUE; 1598 b->nonew = 0; 1599 b->diag = 0; 1600 b->solve_work = 0; 1601 b->mult_work = 0; 1602 B->spptr = 0; 1603 b->keepzeroedrows = PETSC_FALSE; 1604 b->xtoy = 0; 1605 b->XtoY = 0; 1606 1607 b->inew = 0; 1608 b->jnew = 0; 1609 b->anew = 0; 1610 b->a2anew = 0; 1611 b->permute = PETSC_FALSE; 1612 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1614 "MatStoreValues_SeqSBAIJ", 1615 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1617 "MatRetrieveValues_SeqSBAIJ", 1618 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1620 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1621 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1623 "MatConvert_SeqSBAIJ_SeqAIJ", 1624 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1626 "MatConvert_SeqSBAIJ_SeqBAIJ", 1627 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1629 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1630 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1631 1632 B->symmetric = PETSC_TRUE; 1633 B->structurally_symmetric = PETSC_TRUE; 1634 B->symmetric_set = PETSC_TRUE; 1635 B->structurally_symmetric_set = PETSC_TRUE; 1636 PetscFunctionReturn(0); 1637 } 1638 EXTERN_C_END 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1642 /*@C 1643 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1644 compressed row) format. For good matrix assembly performance the 1645 user should preallocate the matrix storage by setting the parameter nz 1646 (or the array nnz). By setting these parameters accurately, performance 1647 during matrix assembly can be increased by more than a factor of 50. 1648 1649 Collective on Mat 1650 1651 Input Parameters: 1652 + A - the symmetric matrix 1653 . bs - size of block 1654 . nz - number of block nonzeros per block row (same for all rows) 1655 - nnz - array containing the number of block nonzeros in the upper triangular plus 1656 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1657 1658 Options Database Keys: 1659 . -mat_no_unroll - uses code that does not unroll the loops in the 1660 block calculations (much slower) 1661 . -mat_block_size - size of the blocks to use 1662 1663 Level: intermediate 1664 1665 Notes: 1666 Specify the preallocated storage with either nz or nnz (not both). 1667 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1668 allocation. For additional details, see the users manual chapter on 1669 matrices. 1670 1671 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1672 @*/ 1673 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1674 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 1675 1676 PetscFunctionBegin; 1677 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1678 if (f) { 1679 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatCreateSeqSBAIJ" 1686 /*@C 1687 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1688 compressed row) format. For good matrix assembly performance the 1689 user should preallocate the matrix storage by setting the parameter nz 1690 (or the array nnz). By setting these parameters accurately, performance 1691 during matrix assembly can be increased by more than a factor of 50. 1692 1693 Collective on MPI_Comm 1694 1695 Input Parameters: 1696 + comm - MPI communicator, set to PETSC_COMM_SELF 1697 . bs - size of block 1698 . m - number of rows, or number of columns 1699 . nz - number of block nonzeros per block row (same for all rows) 1700 - nnz - array containing the number of block nonzeros in the upper triangular plus 1701 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1702 1703 Output Parameter: 1704 . A - the symmetric matrix 1705 1706 Options Database Keys: 1707 . -mat_no_unroll - uses code that does not unroll the loops in the 1708 block calculations (much slower) 1709 . -mat_block_size - size of the blocks to use 1710 1711 Level: intermediate 1712 1713 Notes: 1714 1715 Specify the preallocated storage with either nz or nnz (not both). 1716 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1717 allocation. For additional details, see the users manual chapter on 1718 matrices. 1719 1720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1721 @*/ 1722 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1723 { 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1728 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1729 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1735 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1736 { 1737 Mat C; 1738 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1741 1742 PetscFunctionBegin; 1743 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1744 1745 *B = 0; 1746 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1747 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1748 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1749 c = (Mat_SeqSBAIJ*)C->data; 1750 1751 C->preallocated = PETSC_TRUE; 1752 C->factor = A->factor; 1753 c->row = 0; 1754 c->icol = 0; 1755 c->saved_values = 0; 1756 c->keepzeroedrows = a->keepzeroedrows; 1757 C->assembled = PETSC_TRUE; 1758 1759 C->M = A->M; 1760 C->N = A->N; 1761 c->bs = a->bs; 1762 c->bs2 = a->bs2; 1763 c->mbs = a->mbs; 1764 c->nbs = a->nbs; 1765 1766 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1767 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1768 for (i=0; i<mbs; i++) { 1769 c->imax[i] = a->imax[i]; 1770 c->ilen[i] = a->ilen[i]; 1771 } 1772 1773 /* allocate the matrix space */ 1774 c->singlemalloc = PETSC_TRUE; 1775 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1776 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1777 c->j = (int*)(c->a + nz*bs2); 1778 c->i = c->j + nz; 1779 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1780 if (mbs > 0) { 1781 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1782 if (cpvalues == MAT_COPY_VALUES) { 1783 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1784 } else { 1785 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1786 } 1787 } 1788 1789 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1790 c->sorted = a->sorted; 1791 c->roworiented = a->roworiented; 1792 c->nonew = a->nonew; 1793 1794 if (a->diag) { 1795 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1796 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1797 for (i=0; i<mbs; i++) { 1798 c->diag[i] = a->diag[i]; 1799 } 1800 } else c->diag = 0; 1801 c->nz = a->nz; 1802 c->maxnz = a->maxnz; 1803 c->solve_work = 0; 1804 c->mult_work = 0; 1805 *B = C; 1806 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1812 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1813 { 1814 Mat_SeqSBAIJ *a; 1815 Mat B; 1816 PetscErrorCode ierr; 1817 int i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1818 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1819 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1820 int *masked,nmask,tmp,bs2,ishift; 1821 PetscScalar *aa; 1822 MPI_Comm comm = ((PetscObject)viewer)->comm; 1823 1824 PetscFunctionBegin; 1825 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1826 bs2 = bs*bs; 1827 1828 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1829 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1830 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1831 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1832 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1833 M = header[1]; N = header[2]; nz = header[3]; 1834 1835 if (header[3] < 0) { 1836 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1837 } 1838 1839 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1840 1841 /* 1842 This code adds extra rows to make sure the number of rows is 1843 divisible by the blocksize 1844 */ 1845 mbs = M/bs; 1846 extra_rows = bs - M + bs*(mbs); 1847 if (extra_rows == bs) extra_rows = 0; 1848 else mbs++; 1849 if (extra_rows) { 1850 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1851 } 1852 1853 /* read in row lengths */ 1854 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1855 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1856 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1857 1858 /* read in column indices */ 1859 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1860 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1861 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1862 1863 /* loop over row lengths determining block row lengths */ 1864 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1865 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1866 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1867 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1868 masked = mask + mbs; 1869 rowcount = 0; nzcount = 0; 1870 for (i=0; i<mbs; i++) { 1871 nmask = 0; 1872 for (j=0; j<bs; j++) { 1873 kmax = rowlengths[rowcount]; 1874 for (k=0; k<kmax; k++) { 1875 tmp = jj[nzcount++]/bs; /* block col. index */ 1876 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1877 } 1878 rowcount++; 1879 } 1880 s_browlengths[i] += nmask; 1881 1882 /* zero out the mask elements we set */ 1883 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1884 } 1885 1886 /* create our matrix */ 1887 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1888 ierr = MatSetType(B,type);CHKERRQ(ierr); 1889 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1890 a = (Mat_SeqSBAIJ*)B->data; 1891 1892 /* set matrix "i" values */ 1893 a->i[0] = 0; 1894 for (i=1; i<= mbs; i++) { 1895 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1896 a->ilen[i-1] = s_browlengths[i-1]; 1897 } 1898 a->nz = a->i[mbs]; 1899 1900 /* read in nonzero values */ 1901 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1902 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1903 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1904 1905 /* set "a" and "j" values into matrix */ 1906 nzcount = 0; jcount = 0; 1907 for (i=0; i<mbs; i++) { 1908 nzcountb = nzcount; 1909 nmask = 0; 1910 for (j=0; j<bs; j++) { 1911 kmax = rowlengths[i*bs+j]; 1912 for (k=0; k<kmax; k++) { 1913 tmp = jj[nzcount++]/bs; /* block col. index */ 1914 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1915 } 1916 } 1917 /* sort the masked values */ 1918 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1919 1920 /* set "j" values into matrix */ 1921 maskcount = 1; 1922 for (j=0; j<nmask; j++) { 1923 a->j[jcount++] = masked[j]; 1924 mask[masked[j]] = maskcount++; 1925 } 1926 1927 /* set "a" values into matrix */ 1928 ishift = bs2*a->i[i]; 1929 for (j=0; j<bs; j++) { 1930 kmax = rowlengths[i*bs+j]; 1931 for (k=0; k<kmax; k++) { 1932 tmp = jj[nzcountb]/bs ; /* block col. index */ 1933 if (tmp >= i){ 1934 block = mask[tmp] - 1; 1935 point = jj[nzcountb] - bs*tmp; 1936 idx = ishift + bs2*block + j + bs*point; 1937 a->a[idx] = aa[nzcountb]; 1938 } 1939 nzcountb++; 1940 } 1941 } 1942 /* zero out the mask elements we set */ 1943 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1944 } 1945 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1946 1947 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1948 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1949 ierr = PetscFree(aa);CHKERRQ(ierr); 1950 ierr = PetscFree(jj);CHKERRQ(ierr); 1951 ierr = PetscFree(mask);CHKERRQ(ierr); 1952 1953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 ierr = MatView_Private(B);CHKERRQ(ierr); 1956 *A = B; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1962 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1963 { 1964 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1965 MatScalar *aa=a->a,*v,*v1; 1966 PetscScalar *x,*b,*t,sum,d; 1967 PetscErrorCode ierr; 1968 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j; 1969 int nz,nz1,*vj,*vj1,i; 1970 1971 PetscFunctionBegin; 1972 its = its*lits; 1973 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1974 1975 if (bs > 1) 1976 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1977 1978 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1979 if (xx != bb) { 1980 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1981 } else { 1982 b = x; 1983 } 1984 1985 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1986 1987 if (flag & SOR_ZERO_INITIAL_GUESS) { 1988 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1989 for (i=0; i<m; i++) 1990 t[i] = b[i]; 1991 1992 for (i=0; i<m; i++){ 1993 d = *(aa + ai[i]); /* diag[i] */ 1994 v = aa + ai[i] + 1; 1995 vj = aj + ai[i] + 1; 1996 nz = ai[i+1] - ai[i] - 1; 1997 x[i] = omega*t[i]/d; 1998 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1999 PetscLogFlops(2*nz-1); 2000 } 2001 } 2002 2003 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2004 for (i=0; i<m; i++) 2005 t[i] = b[i]; 2006 2007 for (i=0; i<m-1; i++){ /* update rhs */ 2008 v = aa + ai[i] + 1; 2009 vj = aj + ai[i] + 1; 2010 nz = ai[i+1] - ai[i] - 1; 2011 while (nz--) t[*vj++] -= x[i]*(*v++); 2012 PetscLogFlops(2*nz-1); 2013 } 2014 for (i=m-1; i>=0; i--){ 2015 d = *(aa + ai[i]); 2016 v = aa + ai[i] + 1; 2017 vj = aj + ai[i] + 1; 2018 nz = ai[i+1] - ai[i] - 1; 2019 sum = t[i]; 2020 while (nz--) sum -= x[*vj++]*(*v++); 2021 PetscLogFlops(2*nz-1); 2022 x[i] = (1-omega)*x[i] + omega*sum/d; 2023 } 2024 } 2025 its--; 2026 } 2027 2028 while (its--) { 2029 /* 2030 forward sweep: 2031 for i=0,...,m-1: 2032 sum[i] = (b[i] - U(i,:)x )/d[i]; 2033 x[i] = (1-omega)x[i] + omega*sum[i]; 2034 b = b - x[i]*U^T(i,:); 2035 2036 */ 2037 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2038 for (i=0; i<m; i++) 2039 t[i] = b[i]; 2040 2041 for (i=0; i<m; i++){ 2042 d = *(aa + ai[i]); /* diag[i] */ 2043 v = aa + ai[i] + 1; v1=v; 2044 vj = aj + ai[i] + 1; vj1=vj; 2045 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2046 sum = t[i]; 2047 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2048 x[i] = (1-omega)*x[i] + omega*sum/d; 2049 while (nz--) t[*vj++] -= x[i]*(*v++); 2050 PetscLogFlops(4*nz-2); 2051 } 2052 } 2053 2054 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2055 /* 2056 backward sweep: 2057 b = b - x[i]*U^T(i,:), i=0,...,n-2 2058 for i=m-1,...,0: 2059 sum[i] = (b[i] - U(i,:)x )/d[i]; 2060 x[i] = (1-omega)x[i] + omega*sum[i]; 2061 */ 2062 for (i=0; i<m; i++) 2063 t[i] = b[i]; 2064 2065 for (i=0; i<m-1; i++){ /* update rhs */ 2066 v = aa + ai[i] + 1; 2067 vj = aj + ai[i] + 1; 2068 nz = ai[i+1] - ai[i] - 1; 2069 while (nz--) t[*vj++] -= x[i]*(*v++); 2070 PetscLogFlops(2*nz-1); 2071 } 2072 for (i=m-1; i>=0; i--){ 2073 d = *(aa + ai[i]); 2074 v = aa + ai[i] + 1; 2075 vj = aj + ai[i] + 1; 2076 nz = ai[i+1] - ai[i] - 1; 2077 sum = t[i]; 2078 while (nz--) sum -= x[*vj++]*(*v++); 2079 PetscLogFlops(2*nz-1); 2080 x[i] = (1-omega)*x[i] + omega*sum/d; 2081 } 2082 } 2083 } 2084 2085 ierr = PetscFree(t);CHKERRQ(ierr); 2086 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2087 if (bb != xx) { 2088 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2089 } 2090 2091 PetscFunctionReturn(0); 2092 } 2093 2094 2095 2096 2097 2098 2099