1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 /* extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); */ 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 53 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 54 { 55 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 56 57 PetscFunctionBegin; 58 59 if (ia) { 60 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 61 } 62 63 #ifdef NEW 64 int ierr; 65 66 if (!ia) PetscFunctionReturn(0); 67 68 /* 69 if (symmetric) { 70 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 71 } else if (oshift == 1) { 72 int nz = a->i[n]; 73 for (i=0; i<nz; i++) a->j[i]++; 74 for (i=0; i<n+1; i++) a->i[i]++; 75 *ia = a->i; *ja = a->j; 76 } else { 77 */ 78 *ia = a->i; *ja = a->j; 79 /* } */ 80 #endif 81 *nn = a->mbs; 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 88 { 89 PetscFunctionBegin; 90 if (!ia) PetscFunctionReturn(0); 91 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 92 /* PetscFunctionReturn(0); */ 93 } 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 97 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 98 { 99 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 100 101 PetscFunctionBegin; 102 *bs = sbaij->bs; 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 int MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 int ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 116 #endif 117 ierr = PetscFree(a->a);CHKERRQ(ierr); 118 if (!a->singlemalloc) { 119 ierr = PetscFree(a->i);CHKERRQ(ierr); 120 ierr = PetscFree(a->j);CHKERRQ(ierr); 121 } 122 if (a->row) { 123 ierr = ISDestroy(a->row);CHKERRQ(ierr); 124 } 125 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 126 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 127 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 128 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 129 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 130 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 131 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 132 133 if (a->inew){ 134 ierr = PetscFree(a->inew);CHKERRQ(ierr); 135 a->inew = 0; 136 } 137 ierr = PetscFree(a);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 143 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 144 { 145 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 146 147 PetscFunctionBegin; 148 switch (op) { 149 case MAT_ROW_ORIENTED: 150 a->roworiented = PETSC_TRUE; 151 break; 152 case MAT_COLUMN_ORIENTED: 153 a->roworiented = PETSC_FALSE; 154 break; 155 case MAT_COLUMNS_SORTED: 156 a->sorted = PETSC_TRUE; 157 break; 158 case MAT_COLUMNS_UNSORTED: 159 a->sorted = PETSC_FALSE; 160 break; 161 case MAT_KEEP_ZEROED_ROWS: 162 a->keepzeroedrows = PETSC_TRUE; 163 break; 164 case MAT_NO_NEW_NONZERO_LOCATIONS: 165 a->nonew = 1; 166 break; 167 case MAT_NEW_NONZERO_LOCATION_ERR: 168 a->nonew = -1; 169 break; 170 case MAT_NEW_NONZERO_ALLOCATION_ERR: 171 a->nonew = -2; 172 break; 173 case MAT_YES_NEW_NONZERO_LOCATIONS: 174 a->nonew = 0; 175 break; 176 case MAT_ROWS_SORTED: 177 case MAT_ROWS_UNSORTED: 178 case MAT_YES_NEW_DIAGONALS: 179 case MAT_IGNORE_OFF_PROC_ENTRIES: 180 case MAT_USE_HASH_TABLE: 181 case MAT_USE_SINGLE_PRECISION_SOLVES: 182 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 183 break; 184 case MAT_NO_NEW_DIAGONALS: 185 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 198 MatScalar *aa,*aa_i; 199 PetscScalar *v_i; 200 201 PetscFunctionBegin; 202 bs = a->bs; 203 ai = a->i; 204 aj = a->j; 205 aa = a->a; 206 bs2 = a->bs2; 207 208 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 209 210 bn = row/bs; /* Block number */ 211 bp = row % bs; /* Block position */ 212 M = ai[bn+1] - ai[bn]; 213 *ncols = bs*M; 214 215 if (v) { 216 *v = 0; 217 if (*ncols) { 218 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 219 for (i=0; i<M; i++) { /* for each block in the block row */ 220 v_i = *v + i*bs; 221 aa_i = aa + bs2*(ai[bn] + i); 222 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 223 } 224 } 225 } 226 227 if (cols) { 228 *cols = 0; 229 if (*ncols) { 230 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 231 for (i=0; i<M; i++) { /* for each block in the block row */ 232 cols_i = *cols + i*bs; 233 itmp = bs*aj[ai[bn] + i]; 234 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 235 } 236 } 237 } 238 239 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 240 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 241 #ifdef column_search 242 v_i = *v + M*bs; 243 cols_i = *cols + M*bs; 244 for (i=0; i<bn; i++){ /* for each block row */ 245 M = ai[i+1] - ai[i]; 246 for (j=0; j<M; j++){ 247 itmp = aj[ai[i] + j]; /* block column value */ 248 if (itmp == bn){ 249 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 250 for (k=0; k<bs; k++) { 251 *cols_i++ = i*bs+k; 252 *v_i++ = aa_i[k]; 253 } 254 *ncols += bs; 255 break; 256 } 257 } 258 } 259 #endif 260 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 266 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 267 { 268 int ierr; 269 270 PetscFunctionBegin; 271 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 272 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 278 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 279 { 280 int ierr; 281 PetscFunctionBegin; 282 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 288 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 292 PetscScalar *aa; 293 FILE *file; 294 295 PetscFunctionBegin; 296 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 297 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 298 col_lens[0] = MAT_FILE_COOKIE; 299 300 col_lens[1] = A->m; 301 col_lens[2] = A->m; 302 col_lens[3] = a->s_nz*bs2; 303 304 /* store lengths of each row and write (including header) to file */ 305 count = 0; 306 for (i=0; i<a->mbs; i++) { 307 for (j=0; j<bs; j++) { 308 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 309 } 310 } 311 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 312 ierr = PetscFree(col_lens);CHKERRQ(ierr); 313 314 /* store column indices (zero start index) */ 315 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 316 count = 0; 317 for (i=0; i<a->mbs; i++) { 318 for (j=0; j<bs; j++) { 319 for (k=a->i[i]; k<a->i[i+1]; k++) { 320 for (l=0; l<bs; l++) { 321 jj[count++] = bs*a->j[k] + l; 322 } 323 } 324 } 325 } 326 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 327 ierr = PetscFree(jj);CHKERRQ(ierr); 328 329 /* store nonzero values */ 330 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 331 count = 0; 332 for (i=0; i<a->mbs; i++) { 333 for (j=0; j<bs; j++) { 334 for (k=a->i[i]; k<a->i[i+1]; k++) { 335 for (l=0; l<bs; l++) { 336 aa[count++] = a->a[bs2*k + l*bs + j]; 337 } 338 } 339 } 340 } 341 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 342 ierr = PetscFree(aa);CHKERRQ(ierr); 343 344 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 345 if (file) { 346 fprintf(file,"-matload_block_size %d\n",a->bs); 347 } 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNCT__ 352 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 353 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 354 { 355 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 356 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 357 char *name; 358 PetscViewerFormat format; 359 360 PetscFunctionBegin; 361 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 362 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 363 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 364 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 365 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 366 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 367 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 368 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 369 for (i=0; i<a->mbs; i++) { 370 for (j=0; j<bs; j++) { 371 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 372 for (k=a->i[i]; k<a->i[i+1]; k++) { 373 for (l=0; l<bs; l++) { 374 #if defined(PETSC_USE_COMPLEX) 375 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 376 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 377 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 378 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 379 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 380 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 381 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 383 } 384 #else 385 if (a->a[bs2*k + l*bs + j] != 0.0) { 386 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 387 } 388 #endif 389 } 390 } 391 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 395 } else { 396 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 397 for (i=0; i<a->mbs; i++) { 398 for (j=0; j<bs; j++) { 399 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 400 for (k=a->i[i]; k<a->i[i+1]; k++) { 401 for (l=0; l<bs; l++) { 402 #if defined(PETSC_USE_COMPLEX) 403 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 404 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 405 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 406 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 407 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 408 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 409 } else { 410 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 411 } 412 #else 413 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 414 #endif 415 } 416 } 417 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 418 } 419 } 420 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 421 } 422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 428 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 429 { 430 Mat A = (Mat) Aa; 431 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 432 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 433 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 434 MatScalar *aa; 435 MPI_Comm comm; 436 PetscViewer viewer; 437 438 PetscFunctionBegin; 439 /* 440 This is nasty. If this is called from an originally parallel matrix 441 then all processes call this,but only the first has the matrix so the 442 rest should return immediately. 443 */ 444 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 445 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 446 if (rank) PetscFunctionReturn(0); 447 448 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 449 450 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 451 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 452 453 /* loop over matrix elements drawing boxes */ 454 color = PETSC_DRAW_BLUE; 455 for (i=0,row=0; i<mbs; i++,row+=bs) { 456 for (j=a->i[i]; j<a->i[i+1]; j++) { 457 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 458 x_l = a->j[j]*bs; x_r = x_l + 1.0; 459 aa = a->a + j*bs2; 460 for (k=0; k<bs; k++) { 461 for (l=0; l<bs; l++) { 462 if (PetscRealPart(*aa++) >= 0.) continue; 463 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 464 } 465 } 466 } 467 } 468 color = PETSC_DRAW_CYAN; 469 for (i=0,row=0; i<mbs; i++,row+=bs) { 470 for (j=a->i[i]; j<a->i[i+1]; j++) { 471 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 472 x_l = a->j[j]*bs; x_r = x_l + 1.0; 473 aa = a->a + j*bs2; 474 for (k=0; k<bs; k++) { 475 for (l=0; l<bs; l++) { 476 if (PetscRealPart(*aa++) != 0.) continue; 477 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 478 } 479 } 480 } 481 } 482 483 color = PETSC_DRAW_RED; 484 for (i=0,row=0; i<mbs; i++,row+=bs) { 485 for (j=a->i[i]; j<a->i[i+1]; j++) { 486 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 487 x_l = a->j[j]*bs; x_r = x_l + 1.0; 488 aa = a->a + j*bs2; 489 for (k=0; k<bs; k++) { 490 for (l=0; l<bs; l++) { 491 if (PetscRealPart(*aa++) <= 0.) continue; 492 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 493 } 494 } 495 } 496 } 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 502 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 503 { 504 int ierr; 505 PetscReal xl,yl,xr,yr,w,h; 506 PetscDraw draw; 507 PetscTruth isnull; 508 509 PetscFunctionBegin; 510 511 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 512 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 513 514 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 515 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 516 xr += w; yr += h; xl = -w; yl = -h; 517 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 518 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 519 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatView_SeqSBAIJ" 525 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 526 { 527 int ierr; 528 PetscTruth issocket,isascii,isbinary,isdraw; 529 530 PetscFunctionBegin; 531 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 532 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 533 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 534 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 535 if (issocket) { 536 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 537 } else if (isascii){ 538 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 539 } else if (isbinary) { 540 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 541 } else if (isdraw) { 542 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 543 } else { 544 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 545 } 546 PetscFunctionReturn(0); 547 } 548 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 552 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 553 { 554 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 555 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 556 int *ai = a->i,*ailen = a->ilen; 557 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 558 MatScalar *ap,*aa = a->a,zero = 0.0; 559 560 PetscFunctionBegin; 561 for (k=0; k<m; k++) { /* loop over rows */ 562 row = im[k]; brow = row/bs; 563 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 564 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 565 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 566 nrow = ailen[brow]; 567 for (l=0; l<n; l++) { /* loop over columns */ 568 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 569 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 570 col = in[l] ; 571 bcol = col/bs; 572 cidx = col%bs; 573 ridx = row%bs; 574 high = nrow; 575 low = 0; /* assume unsorted */ 576 while (high-low > 5) { 577 t = (low+high)/2; 578 if (rp[t] > bcol) high = t; 579 else low = t; 580 } 581 for (i=low; i<high; i++) { 582 if (rp[i] > bcol) break; 583 if (rp[i] == bcol) { 584 *v++ = ap[bs2*i+bs*cidx+ridx]; 585 goto finished; 586 } 587 } 588 *v++ = zero; 589 finished:; 590 } 591 } 592 PetscFunctionReturn(0); 593 } 594 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 598 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 599 { 600 PetscFunctionBegin; 601 SETERRQ(1,"Function not yet written for SBAIJ format"); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 606 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 607 { 608 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 609 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 610 int m = A->m,*ip,N,*ailen = a->ilen; 611 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 612 MatScalar *aa = a->a,*ap; 613 #if defined(PETSC_HAVE_SPOOLES) 614 PetscTruth flag; 615 #endif 616 617 PetscFunctionBegin; 618 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 619 620 if (m) rmax = ailen[0]; 621 for (i=1; i<mbs; i++) { 622 /* move each row back by the amount of empty slots (fshift) before it*/ 623 fshift += imax[i-1] - ailen[i-1]; 624 rmax = PetscMax(rmax,ailen[i]); 625 if (fshift) { 626 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 627 N = ailen[i]; 628 for (j=0; j<N; j++) { 629 ip[j-fshift] = ip[j]; 630 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 631 } 632 } 633 ai[i] = ai[i-1] + ailen[i-1]; 634 } 635 if (mbs) { 636 fshift += imax[mbs-1] - ailen[mbs-1]; 637 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 638 } 639 /* reset ilen and imax for each row */ 640 for (i=0; i<mbs; i++) { 641 ailen[i] = imax[i] = ai[i+1] - ai[i]; 642 } 643 a->s_nz = ai[mbs]; 644 645 /* diagonals may have moved, reset it */ 646 if (a->diag) { 647 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 648 } 649 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 650 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 651 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 652 a->reallocs); 653 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 654 a->reallocs = 0; 655 A->info.nz_unneeded = (PetscReal)fshift*bs2; 656 657 #if defined(PETSC_HAVE_SPOOLES) 658 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 659 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 660 #endif 661 662 PetscFunctionReturn(0); 663 } 664 665 /* 666 This function returns an array of flags which indicate the locations of contiguous 667 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 668 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 669 Assume: sizes should be long enough to hold all the values. 670 */ 671 #undef __FUNCT__ 672 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 673 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 674 { 675 int i,j,k,row; 676 PetscTruth flg; 677 678 PetscFunctionBegin; 679 for (i=0,j=0; i<n; j++) { 680 row = idx[i]; 681 if (row%bs!=0) { /* Not the begining of a block */ 682 sizes[j] = 1; 683 i++; 684 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 685 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 686 i++; 687 } else { /* Begining of the block, so check if the complete block exists */ 688 flg = PETSC_TRUE; 689 for (k=1; k<bs; k++) { 690 if (row+k != idx[i+k]) { /* break in the block */ 691 flg = PETSC_FALSE; 692 break; 693 } 694 } 695 if (flg == PETSC_TRUE) { /* No break in the bs */ 696 sizes[j] = bs; 697 i+= bs; 698 } else { 699 sizes[j] = 1; 700 i++; 701 } 702 } 703 } 704 *bs_max = j; 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 710 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 711 { 712 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 713 int ierr,i,j,k,count,is_n,*is_idx,*rows; 714 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 715 PetscScalar zero = 0.0; 716 MatScalar *aa; 717 718 PetscFunctionBegin; 719 /* Make a copy of the IS and sort it */ 720 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 721 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 722 723 /* allocate memory for rows,sizes */ 724 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 725 sizes = rows + is_n; 726 727 /* initialize copy IS values to rows, and sort them */ 728 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 729 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 730 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 731 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 732 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 733 } else { 734 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 735 } 736 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 737 738 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 739 row = rows[j]; /* row to be zeroed */ 740 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 741 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 742 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 743 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 744 if (diag) { 745 if (sbaij->ilen[row/bs] > 0) { 746 sbaij->ilen[row/bs] = 1; 747 sbaij->j[sbaij->i[row/bs]] = row/bs; 748 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 749 } 750 /* Now insert all the diagoanl values for this bs */ 751 for (k=0; k<bs; k++) { 752 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 753 } 754 } else { /* (!diag) */ 755 sbaij->ilen[row/bs] = 0; 756 } /* end (!diag) */ 757 } else { /* (sizes[i] != bs), broken block */ 758 #if defined (PETSC_USE_DEBUG) 759 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 760 #endif 761 for (k=0; k<count; k++) { 762 aa[0] = zero; 763 aa+=bs; 764 } 765 if (diag) { 766 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 767 } 768 } 769 } 770 771 ierr = PetscFree(rows);CHKERRQ(ierr); 772 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 /* Only add/insert a(i,j) with i<=j (blocks). 778 Any a(i,j) with i>j input by user is ingored. 779 */ 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 783 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 784 { 785 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 786 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 787 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 788 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 789 int ridx,cidx,bs2=a->bs2,ierr; 790 MatScalar *ap,value,*aa=a->a,*bap; 791 792 PetscFunctionBegin; 793 794 for (k=0; k<m; k++) { /* loop over added rows */ 795 row = im[k]; /* row number */ 796 brow = row/bs; /* block row number */ 797 if (row < 0) continue; 798 #if defined(PETSC_USE_BOPT_g) 799 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 800 #endif 801 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 802 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 803 rmax = imax[brow]; /* maximum space allocated for this row */ 804 nrow = ailen[brow]; /* actual length of this row */ 805 low = 0; 806 807 for (l=0; l<n; l++) { /* loop over added columns */ 808 if (in[l] < 0) continue; 809 #if defined(PETSC_USE_BOPT_g) 810 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 811 #endif 812 col = in[l]; 813 bcol = col/bs; /* block col number */ 814 815 if (brow <= bcol){ 816 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 817 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 818 /* element value a(k,l) */ 819 if (roworiented) { 820 value = v[l + k*n]; 821 } else { 822 value = v[k + l*m]; 823 } 824 825 /* move pointer bap to a(k,l) quickly and add/insert value */ 826 if (!sorted) low = 0; high = nrow; 827 while (high-low > 7) { 828 t = (low+high)/2; 829 if (rp[t] > bcol) high = t; 830 else low = t; 831 } 832 for (i=low; i<high; i++) { 833 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 834 if (rp[i] > bcol) break; 835 if (rp[i] == bcol) { 836 bap = ap + bs2*i + bs*cidx + ridx; 837 if (is == ADD_VALUES) *bap += value; 838 else *bap = value; 839 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 840 if (brow == bcol && ridx < cidx){ 841 bap = ap + bs2*i + bs*ridx + cidx; 842 if (is == ADD_VALUES) *bap += value; 843 else *bap = value; 844 } 845 goto noinsert1; 846 } 847 } 848 849 if (nonew == 1) goto noinsert1; 850 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 851 if (nrow >= rmax) { 852 /* there is no extra room in row, therefore enlarge */ 853 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 854 MatScalar *new_a; 855 856 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 857 858 /* Malloc new storage space */ 859 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 860 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 861 new_j = (int*)(new_a + bs2*new_nz); 862 new_i = new_j + new_nz; 863 864 /* copy over old data into new slots */ 865 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 866 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 867 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 868 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 869 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 870 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 871 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 872 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 873 /* free up old matrix storage */ 874 ierr = PetscFree(a->a);CHKERRQ(ierr); 875 if (!a->singlemalloc) { 876 ierr = PetscFree(a->i);CHKERRQ(ierr); 877 ierr = PetscFree(a->j);CHKERRQ(ierr); 878 } 879 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 880 a->singlemalloc = PETSC_TRUE; 881 882 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 883 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 884 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 885 a->s_maxnz += bs2*CHUNKSIZE; 886 a->reallocs++; 887 a->s_nz++; 888 } 889 890 N = nrow++ - 1; 891 /* shift up all the later entries in this row */ 892 for (ii=N; ii>=i; ii--) { 893 rp[ii+1] = rp[ii]; 894 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 895 } 896 if (N>=i) { 897 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 898 } 899 rp[i] = bcol; 900 ap[bs2*i + bs*cidx + ridx] = value; 901 noinsert1:; 902 low = i; 903 /* } */ 904 } 905 } /* end of if .. if.. */ 906 } /* end of loop over added columns */ 907 ailen[brow] = nrow; 908 } /* end of loop over added rows */ 909 910 PetscFunctionReturn(0); 911 } 912 913 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 914 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 915 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 916 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 917 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 918 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 919 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 920 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 921 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 922 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 923 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 924 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 925 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 926 extern int MatZeroEntries_SeqSBAIJ(Mat); 927 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 928 929 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 930 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 931 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 932 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 933 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 934 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 935 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 936 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 937 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 938 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 939 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 940 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 941 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 942 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 943 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 944 945 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 946 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 947 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 948 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 949 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 950 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 951 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 952 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 953 954 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 955 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 956 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 957 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 958 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 959 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 960 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 961 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 962 963 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 964 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 965 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 966 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 967 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 968 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 969 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 970 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 971 972 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 973 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 974 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 975 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 976 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 977 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 978 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 979 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 980 981 #ifdef HAVE_MatICCFactor 982 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 985 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 986 { 987 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 988 Mat outA; 989 int ierr; 990 PetscTruth row_identity,col_identity; 991 992 PetscFunctionBegin; 993 /* 994 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 995 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 996 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 997 if (!row_identity || !col_identity) { 998 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 999 } 1000 */ 1001 1002 outA = inA; 1003 inA->factor = FACTOR_CHOLESKY; 1004 1005 if (!a->diag) { 1006 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1007 } 1008 /* 1009 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1010 for ILU(0) factorization with natural ordering 1011 */ 1012 switch (a->bs) { 1013 case 1: 1014 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1015 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1016 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1017 case 2: 1018 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1019 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1020 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1021 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1022 break; 1023 case 3: 1024 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1025 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1026 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1027 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1028 break; 1029 case 4: 1030 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1031 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1032 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1033 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1034 break; 1035 case 5: 1036 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1037 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1038 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1039 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1040 break; 1041 case 6: 1042 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1043 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1044 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1045 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1046 break; 1047 case 7: 1048 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1049 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1050 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1051 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1052 break; 1053 default: 1054 a->row = row; 1055 a->icol = col; 1056 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1057 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1058 1059 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1060 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1061 PetscLogObjectParent(inA,a->icol); 1062 1063 if (!a->solve_work) { 1064 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1065 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1066 } 1067 } 1068 1069 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1070 1071 PetscFunctionReturn(0); 1072 } 1073 #endif 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1077 int MatPrintHelp_SeqSBAIJ(Mat A) 1078 { 1079 static PetscTruth called = PETSC_FALSE; 1080 MPI_Comm comm = A->comm; 1081 int ierr; 1082 1083 PetscFunctionBegin; 1084 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1085 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1086 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 EXTERN_C_BEGIN 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1093 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1094 { 1095 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1096 int i,nz,n; 1097 1098 PetscFunctionBegin; 1099 nz = baij->s_maxnz; 1100 n = mat->n; 1101 for (i=0; i<nz; i++) { 1102 baij->j[i] = indices[i]; 1103 } 1104 baij->s_nz = nz; 1105 for (i=0; i<n; i++) { 1106 baij->ilen[i] = baij->imax[i]; 1107 } 1108 1109 PetscFunctionReturn(0); 1110 } 1111 EXTERN_C_END 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1115 /*@ 1116 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1117 in the matrix. 1118 1119 Input Parameters: 1120 + mat - the SeqSBAIJ matrix 1121 - indices - the column indices 1122 1123 Level: advanced 1124 1125 Notes: 1126 This can be called if you have precomputed the nonzero structure of the 1127 matrix and want to provide it to the matrix object to improve the performance 1128 of the MatSetValues() operation. 1129 1130 You MUST have set the correct numbers of nonzeros per row in the call to 1131 MatCreateSeqSBAIJ(). 1132 1133 MUST be called before any calls to MatSetValues() 1134 1135 .seealso: MatCreateSeqSBAIJ 1136 @*/ 1137 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1138 { 1139 int ierr,(*f)(Mat,int *); 1140 1141 PetscFunctionBegin; 1142 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1143 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1144 if (f) { 1145 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1146 } else { 1147 SETERRQ(1,"Wrong type of matrix to set column indices"); 1148 } 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1154 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1155 { 1156 int ierr; 1157 1158 PetscFunctionBegin; 1159 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #include "petscblaslapack.h" 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1166 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1167 { 1168 int ierr,one=1; 1169 Mat_SeqSBAIJ *x = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data; 1170 1171 PetscFunctionBegin; 1172 if (str == SAME_NONZERO_PATTERN) { 1173 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1174 } else { 1175 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1176 } 1177 PetscFunctionReturn(0); 1178 } 1179 1180 /* -------------------------------------------------------------------*/ 1181 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1182 MatGetRow_SeqSBAIJ, 1183 MatRestoreRow_SeqSBAIJ, 1184 MatMult_SeqSBAIJ_N, 1185 MatMultAdd_SeqSBAIJ_N, 1186 MatMultTranspose_SeqSBAIJ, 1187 MatMultTransposeAdd_SeqSBAIJ, 1188 MatSolve_SeqSBAIJ_N, 1189 0, 1190 0, 1191 0, 1192 0, 1193 MatCholeskyFactor_SeqSBAIJ, 1194 MatRelax_SeqSBAIJ, 1195 MatTranspose_SeqSBAIJ, 1196 MatGetInfo_SeqSBAIJ, 1197 MatEqual_SeqSBAIJ, 1198 MatGetDiagonal_SeqSBAIJ, 1199 MatDiagonalScale_SeqSBAIJ, 1200 MatNorm_SeqSBAIJ, 1201 0, 1202 MatAssemblyEnd_SeqSBAIJ, 1203 0, 1204 MatSetOption_SeqSBAIJ, 1205 MatZeroEntries_SeqSBAIJ, 1206 MatZeroRows_SeqSBAIJ, 1207 0, 1208 0, 1209 MatCholeskyFactorSymbolic_SeqSBAIJ, 1210 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1211 MatSetUpPreallocation_SeqSBAIJ, 1212 0, 1213 MatICCFactorSymbolic_SeqSBAIJ, 1214 0, 1215 0, 1216 MatDuplicate_SeqSBAIJ, 1217 0, 1218 0, 1219 0, 1220 0, 1221 MatAXPY_SeqSBAIJ, 1222 MatGetSubMatrices_SeqSBAIJ, 1223 MatIncreaseOverlap_SeqSBAIJ, 1224 MatGetValues_SeqSBAIJ, 1225 0, 1226 MatPrintHelp_SeqSBAIJ, 1227 MatScale_SeqSBAIJ, 1228 0, 1229 0, 1230 0, 1231 MatGetBlockSize_SeqSBAIJ, 1232 MatGetRowIJ_SeqSBAIJ, 1233 MatRestoreRowIJ_SeqSBAIJ, 1234 0, 1235 0, 1236 0, 1237 0, 1238 0, 1239 0, 1240 MatSetValuesBlocked_SeqSBAIJ, 1241 MatGetSubMatrix_SeqSBAIJ, 1242 0, 1243 0, 1244 MatGetPetscMaps_Petsc, 1245 0, 1246 0, 1247 0, 1248 0, 1249 0, 1250 0, 1251 MatGetRowMax_SeqSBAIJ}; 1252 1253 EXTERN_C_BEGIN 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1256 int MatStoreValues_SeqSBAIJ(Mat mat) 1257 { 1258 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1259 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1260 int ierr; 1261 1262 PetscFunctionBegin; 1263 if (aij->nonew != 1) { 1264 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1265 } 1266 1267 /* allocate space for values if not already there */ 1268 if (!aij->saved_values) { 1269 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1270 } 1271 1272 /* copy values over */ 1273 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 EXTERN_C_END 1277 1278 EXTERN_C_BEGIN 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1281 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1282 { 1283 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1284 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1285 1286 PetscFunctionBegin; 1287 if (aij->nonew != 1) { 1288 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1289 } 1290 if (!aij->saved_values) { 1291 SETERRQ(1,"Must call MatStoreValues(A);first"); 1292 } 1293 1294 /* copy values over */ 1295 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1296 PetscFunctionReturn(0); 1297 } 1298 EXTERN_C_END 1299 1300 EXTERN_C_BEGIN 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1303 int MatCreate_SeqSBAIJ(Mat B) 1304 { 1305 Mat_SeqSBAIJ *b; 1306 int ierr,size; 1307 1308 PetscFunctionBegin; 1309 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1310 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1311 B->m = B->M = PetscMax(B->m,B->M); 1312 B->n = B->N = PetscMax(B->n,B->N); 1313 1314 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1315 B->data = (void*)b; 1316 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1317 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1318 B->ops->destroy = MatDestroy_SeqSBAIJ; 1319 B->ops->view = MatView_SeqSBAIJ; 1320 B->factor = 0; 1321 B->lupivotthreshold = 1.0; 1322 B->mapping = 0; 1323 b->row = 0; 1324 b->icol = 0; 1325 b->reallocs = 0; 1326 b->saved_values = 0; 1327 1328 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1329 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1330 1331 b->sorted = PETSC_FALSE; 1332 b->roworiented = PETSC_TRUE; 1333 b->nonew = 0; 1334 b->diag = 0; 1335 b->solve_work = 0; 1336 b->mult_work = 0; 1337 B->spptr = 0; 1338 b->keepzeroedrows = PETSC_FALSE; 1339 1340 b->inew = 0; 1341 b->jnew = 0; 1342 b->anew = 0; 1343 b->a2anew = 0; 1344 b->permute = PETSC_FALSE; 1345 1346 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1347 "MatStoreValues_SeqSBAIJ", 1348 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1349 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1350 "MatRetrieveValues_SeqSBAIJ", 1351 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1353 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1354 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 EXTERN_C_END 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1361 /*@C 1362 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1363 compressed row) format. For good matrix assembly performance the 1364 user should preallocate the matrix storage by setting the parameter nz 1365 (or the array nnz). By setting these parameters accurately, performance 1366 during matrix assembly can be increased by more than a factor of 50. 1367 1368 Collective on Mat 1369 1370 Input Parameters: 1371 + A - the symmetric matrix 1372 . bs - size of block 1373 . nz - number of block nonzeros per block row (same for all rows) 1374 - nnz - array containing the number of block nonzeros in the upper triangular plus 1375 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1376 1377 Options Database Keys: 1378 . -mat_no_unroll - uses code that does not unroll the loops in the 1379 block calculations (much slower) 1380 . -mat_block_size - size of the blocks to use 1381 1382 Level: intermediate 1383 1384 Notes: 1385 Specify the preallocated storage with either nz or nnz (not both). 1386 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1387 allocation. For additional details, see the users manual chapter on 1388 matrices. 1389 1390 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1391 @*/ 1392 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1393 { 1394 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1395 int i,len,ierr,mbs,bs2; 1396 PetscTruth flg; 1397 int s_nz; 1398 1399 PetscFunctionBegin; 1400 B->preallocated = PETSC_TRUE; 1401 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1402 mbs = B->m/bs; 1403 bs2 = bs*bs; 1404 1405 if (mbs*bs != B->m) { 1406 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1407 } 1408 1409 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1410 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1411 if (nnz) { 1412 for (i=0; i<mbs; i++) { 1413 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1414 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1415 } 1416 } 1417 1418 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1419 if (!flg) { 1420 switch (bs) { 1421 case 1: 1422 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1423 B->ops->solve = MatSolve_SeqSBAIJ_1; 1424 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1425 B->ops->mult = MatMult_SeqSBAIJ_1; 1426 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1427 break; 1428 case 2: 1429 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1430 B->ops->solve = MatSolve_SeqSBAIJ_2; 1431 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1432 B->ops->mult = MatMult_SeqSBAIJ_2; 1433 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1434 break; 1435 case 3: 1436 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1437 B->ops->solve = MatSolve_SeqSBAIJ_3; 1438 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1439 B->ops->mult = MatMult_SeqSBAIJ_3; 1440 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1441 break; 1442 case 4: 1443 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1444 B->ops->solve = MatSolve_SeqSBAIJ_4; 1445 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1446 B->ops->mult = MatMult_SeqSBAIJ_4; 1447 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1448 break; 1449 case 5: 1450 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1451 B->ops->solve = MatSolve_SeqSBAIJ_5; 1452 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1453 B->ops->mult = MatMult_SeqSBAIJ_5; 1454 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1455 break; 1456 case 6: 1457 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1458 B->ops->solve = MatSolve_SeqSBAIJ_6; 1459 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1460 B->ops->mult = MatMult_SeqSBAIJ_6; 1461 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1462 break; 1463 case 7: 1464 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1465 B->ops->solve = MatSolve_SeqSBAIJ_7; 1466 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1467 B->ops->mult = MatMult_SeqSBAIJ_7; 1468 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1469 break; 1470 } 1471 } 1472 1473 b->mbs = mbs; 1474 b->nbs = mbs; 1475 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1476 if (!nnz) { 1477 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1478 else if (nz <= 0) nz = 1; 1479 for (i=0; i<mbs; i++) { 1480 b->imax[i] = nz; 1481 } 1482 nz = nz*mbs; /* total nz */ 1483 } else { 1484 nz = 0; 1485 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1486 } 1487 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1488 s_nz = nz; 1489 1490 /* allocate the matrix space */ 1491 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1492 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1493 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1494 b->j = (int*)(b->a + s_nz*bs2); 1495 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1496 b->i = b->j + s_nz; 1497 b->singlemalloc = PETSC_TRUE; 1498 1499 /* pointer to beginning of each row */ 1500 b->i[0] = 0; 1501 for (i=1; i<mbs+1; i++) { 1502 b->i[i] = b->i[i-1] + b->imax[i-1]; 1503 } 1504 1505 /* b->ilen will count nonzeros in each block row so far. */ 1506 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1507 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1508 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1509 1510 b->bs = bs; 1511 b->bs2 = bs2; 1512 b->s_nz = 0; 1513 b->s_maxnz = s_nz*bs2; 1514 1515 b->inew = 0; 1516 b->jnew = 0; 1517 b->anew = 0; 1518 b->a2anew = 0; 1519 b->permute = PETSC_FALSE; 1520 PetscFunctionReturn(0); 1521 } 1522 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatCreateSeqSBAIJ" 1526 /*@C 1527 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1528 compressed row) format. For good matrix assembly performance the 1529 user should preallocate the matrix storage by setting the parameter nz 1530 (or the array nnz). By setting these parameters accurately, performance 1531 during matrix assembly can be increased by more than a factor of 50. 1532 1533 Collective on MPI_Comm 1534 1535 Input Parameters: 1536 + comm - MPI communicator, set to PETSC_COMM_SELF 1537 . bs - size of block 1538 . m - number of rows, or number of columns 1539 . nz - number of block nonzeros per block row (same for all rows) 1540 - nnz - array containing the number of block nonzeros in the upper triangular plus 1541 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1542 1543 Output Parameter: 1544 . A - the symmetric matrix 1545 1546 Options Database Keys: 1547 . -mat_no_unroll - uses code that does not unroll the loops in the 1548 block calculations (much slower) 1549 . -mat_block_size - size of the blocks to use 1550 1551 Level: intermediate 1552 1553 Notes: 1554 1555 Specify the preallocated storage with either nz or nnz (not both). 1556 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1557 allocation. For additional details, see the users manual chapter on 1558 matrices. 1559 1560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1561 @*/ 1562 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1563 { 1564 int ierr; 1565 1566 PetscFunctionBegin; 1567 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1568 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1569 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1575 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1576 { 1577 Mat C; 1578 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1579 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1580 1581 PetscFunctionBegin; 1582 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1583 1584 *B = 0; 1585 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1586 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1587 c = (Mat_SeqSBAIJ*)C->data; 1588 1589 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1590 C->preallocated = PETSC_TRUE; 1591 C->factor = A->factor; 1592 c->row = 0; 1593 c->icol = 0; 1594 c->saved_values = 0; 1595 c->keepzeroedrows = a->keepzeroedrows; 1596 C->assembled = PETSC_TRUE; 1597 1598 c->bs = a->bs; 1599 c->bs2 = a->bs2; 1600 c->mbs = a->mbs; 1601 c->nbs = a->nbs; 1602 1603 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1604 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1605 for (i=0; i<mbs; i++) { 1606 c->imax[i] = a->imax[i]; 1607 c->ilen[i] = a->ilen[i]; 1608 } 1609 1610 /* allocate the matrix space */ 1611 c->singlemalloc = PETSC_TRUE; 1612 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1613 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1614 c->j = (int*)(c->a + nz*bs2); 1615 c->i = c->j + nz; 1616 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1617 if (mbs > 0) { 1618 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1619 if (cpvalues == MAT_COPY_VALUES) { 1620 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1621 } else { 1622 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1623 } 1624 } 1625 1626 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1627 c->sorted = a->sorted; 1628 c->roworiented = a->roworiented; 1629 c->nonew = a->nonew; 1630 1631 if (a->diag) { 1632 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1633 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1634 for (i=0; i<mbs; i++) { 1635 c->diag[i] = a->diag[i]; 1636 } 1637 } else c->diag = 0; 1638 c->s_nz = a->s_nz; 1639 c->s_maxnz = a->s_maxnz; 1640 c->solve_work = 0; 1641 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1642 c->mult_work = 0; 1643 *B = C; 1644 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 EXTERN_C_BEGIN 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1651 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1652 { 1653 Mat_SeqSBAIJ *a; 1654 Mat B; 1655 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1656 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1657 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1658 int *masked,nmask,tmp,bs2,ishift; 1659 PetscScalar *aa; 1660 MPI_Comm comm = ((PetscObject)viewer)->comm; 1661 1662 PetscFunctionBegin; 1663 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1664 bs2 = bs*bs; 1665 1666 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1667 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1668 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1669 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1670 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1671 M = header[1]; N = header[2]; nz = header[3]; 1672 1673 if (header[3] < 0) { 1674 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1675 } 1676 1677 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1678 1679 /* 1680 This code adds extra rows to make sure the number of rows is 1681 divisible by the blocksize 1682 */ 1683 mbs = M/bs; 1684 extra_rows = bs - M + bs*(mbs); 1685 if (extra_rows == bs) extra_rows = 0; 1686 else mbs++; 1687 if (extra_rows) { 1688 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1689 } 1690 1691 /* read in row lengths */ 1692 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1693 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1694 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1695 1696 /* read in column indices */ 1697 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1698 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1699 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1700 1701 /* loop over row lengths determining block row lengths */ 1702 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1703 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1704 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1705 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1706 masked = mask + mbs; 1707 rowcount = 0; nzcount = 0; 1708 for (i=0; i<mbs; i++) { 1709 nmask = 0; 1710 for (j=0; j<bs; j++) { 1711 kmax = rowlengths[rowcount]; 1712 for (k=0; k<kmax; k++) { 1713 tmp = jj[nzcount++]/bs; /* block col. index */ 1714 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1715 } 1716 rowcount++; 1717 } 1718 s_browlengths[i] += nmask; 1719 1720 /* zero out the mask elements we set */ 1721 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1722 } 1723 1724 /* create our matrix */ 1725 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1726 B = *A; 1727 a = (Mat_SeqSBAIJ*)B->data; 1728 1729 /* set matrix "i" values */ 1730 a->i[0] = 0; 1731 for (i=1; i<= mbs; i++) { 1732 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1733 a->ilen[i-1] = s_browlengths[i-1]; 1734 } 1735 a->s_nz = a->i[mbs]; 1736 1737 /* read in nonzero values */ 1738 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1739 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1740 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1741 1742 /* set "a" and "j" values into matrix */ 1743 nzcount = 0; jcount = 0; 1744 for (i=0; i<mbs; i++) { 1745 nzcountb = nzcount; 1746 nmask = 0; 1747 for (j=0; j<bs; j++) { 1748 kmax = rowlengths[i*bs+j]; 1749 for (k=0; k<kmax; k++) { 1750 tmp = jj[nzcount++]/bs; /* block col. index */ 1751 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1752 } 1753 } 1754 /* sort the masked values */ 1755 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1756 1757 /* set "j" values into matrix */ 1758 maskcount = 1; 1759 for (j=0; j<nmask; j++) { 1760 a->j[jcount++] = masked[j]; 1761 mask[masked[j]] = maskcount++; 1762 } 1763 1764 /* set "a" values into matrix */ 1765 ishift = bs2*a->i[i]; 1766 for (j=0; j<bs; j++) { 1767 kmax = rowlengths[i*bs+j]; 1768 for (k=0; k<kmax; k++) { 1769 tmp = jj[nzcountb]/bs ; /* block col. index */ 1770 if (tmp >= i){ 1771 block = mask[tmp] - 1; 1772 point = jj[nzcountb] - bs*tmp; 1773 idx = ishift + bs2*block + j + bs*point; 1774 a->a[idx] = aa[nzcountb]; 1775 } 1776 nzcountb++; 1777 } 1778 } 1779 /* zero out the mask elements we set */ 1780 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1781 } 1782 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1783 1784 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1785 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1786 ierr = PetscFree(aa);CHKERRQ(ierr); 1787 ierr = PetscFree(jj);CHKERRQ(ierr); 1788 ierr = PetscFree(mask);CHKERRQ(ierr); 1789 1790 B->assembled = PETSC_TRUE; 1791 ierr = MatView_Private(B);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 EXTERN_C_END 1795 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1798 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1799 { 1800 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1801 MatScalar *aa=a->a,*v,*v1; 1802 PetscScalar *x,*b,*t,sum,d; 1803 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1804 int nz,nz1,*vj,*vj1,i; 1805 1806 PetscFunctionBegin; 1807 its = its*lits; 1808 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1809 1810 if (bs > 1) 1811 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1812 1813 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1814 if (xx != bb) { 1815 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1816 } else { 1817 b = x; 1818 } 1819 1820 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1821 1822 if (flag & SOR_ZERO_INITIAL_GUESS) { 1823 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1824 for (i=0; i<m; i++) 1825 t[i] = b[i]; 1826 1827 for (i=0; i<m; i++){ 1828 d = *(aa + ai[i]); /* diag[i] */ 1829 v = aa + ai[i] + 1; 1830 vj = aj + ai[i] + 1; 1831 nz = ai[i+1] - ai[i] - 1; 1832 x[i] = omega*t[i]/d; 1833 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1834 PetscLogFlops(2*nz-1); 1835 } 1836 } 1837 1838 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1839 for (i=0; i<m; i++) 1840 t[i] = b[i]; 1841 1842 for (i=0; i<m-1; i++){ /* update rhs */ 1843 v = aa + ai[i] + 1; 1844 vj = aj + ai[i] + 1; 1845 nz = ai[i+1] - ai[i] - 1; 1846 while (nz--) t[*vj++] -= x[i]*(*v++); 1847 PetscLogFlops(2*nz-1); 1848 } 1849 for (i=m-1; i>=0; i--){ 1850 d = *(aa + ai[i]); 1851 v = aa + ai[i] + 1; 1852 vj = aj + ai[i] + 1; 1853 nz = ai[i+1] - ai[i] - 1; 1854 sum = t[i]; 1855 while (nz--) sum -= x[*vj++]*(*v++); 1856 PetscLogFlops(2*nz-1); 1857 x[i] = (1-omega)*x[i] + omega*sum/d; 1858 } 1859 } 1860 its--; 1861 } 1862 1863 while (its--) { 1864 /* 1865 forward sweep: 1866 for i=0,...,m-1: 1867 sum[i] = (b[i] - U(i,:)x )/d[i]; 1868 x[i] = (1-omega)x[i] + omega*sum[i]; 1869 b = b - x[i]*U^T(i,:); 1870 1871 */ 1872 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1873 for (i=0; i<m; i++) 1874 t[i] = b[i]; 1875 1876 for (i=0; i<m; i++){ 1877 d = *(aa + ai[i]); /* diag[i] */ 1878 v = aa + ai[i] + 1; v1=v; 1879 vj = aj + ai[i] + 1; vj1=vj; 1880 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1881 sum = t[i]; 1882 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1883 x[i] = (1-omega)*x[i] + omega*sum/d; 1884 while (nz--) t[*vj++] -= x[i]*(*v++); 1885 PetscLogFlops(4*nz-2); 1886 } 1887 } 1888 1889 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1890 /* 1891 backward sweep: 1892 b = b - x[i]*U^T(i,:), i=0,...,n-2 1893 for i=m-1,...,0: 1894 sum[i] = (b[i] - U(i,:)x )/d[i]; 1895 x[i] = (1-omega)x[i] + omega*sum[i]; 1896 */ 1897 for (i=0; i<m; i++) 1898 t[i] = b[i]; 1899 1900 for (i=0; i<m-1; i++){ /* update rhs */ 1901 v = aa + ai[i] + 1; 1902 vj = aj + ai[i] + 1; 1903 nz = ai[i+1] - ai[i] - 1; 1904 while (nz--) t[*vj++] -= x[i]*(*v++); 1905 PetscLogFlops(2*nz-1); 1906 } 1907 for (i=m-1; i>=0; i--){ 1908 d = *(aa + ai[i]); 1909 v = aa + ai[i] + 1; 1910 vj = aj + ai[i] + 1; 1911 nz = ai[i+1] - ai[i] - 1; 1912 sum = t[i]; 1913 while (nz--) sum -= x[*vj++]*(*v++); 1914 PetscLogFlops(2*nz-1); 1915 x[i] = (1-omega)*x[i] + omega*sum/d; 1916 } 1917 } 1918 } 1919 1920 ierr = PetscFree(t); CHKERRQ(ierr); 1921 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1922 if (bb != xx) { 1923 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1924 } 1925 1926 PetscFunctionReturn(0); 1927 } 1928 1929 1930 1931 1932 1933 1934