1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 124 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 125 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 127 128 if (a->inew){ 129 ierr = PetscFree(a->inew);CHKERRQ(ierr); 130 a->inew = 0; 131 } 132 ierr = PetscFree(a);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 138 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 139 { 140 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 141 142 PetscFunctionBegin; 143 switch (op) { 144 case MAT_ROW_ORIENTED: 145 a->roworiented = PETSC_TRUE; 146 break; 147 case MAT_COLUMN_ORIENTED: 148 a->roworiented = PETSC_FALSE; 149 break; 150 case MAT_COLUMNS_SORTED: 151 a->sorted = PETSC_TRUE; 152 break; 153 case MAT_COLUMNS_UNSORTED: 154 a->sorted = PETSC_FALSE; 155 break; 156 case MAT_KEEP_ZEROED_ROWS: 157 a->keepzeroedrows = PETSC_TRUE; 158 break; 159 case MAT_NO_NEW_NONZERO_LOCATIONS: 160 a->nonew = 1; 161 break; 162 case MAT_NEW_NONZERO_LOCATION_ERR: 163 a->nonew = -1; 164 break; 165 case MAT_NEW_NONZERO_ALLOCATION_ERR: 166 a->nonew = -2; 167 break; 168 case MAT_YES_NEW_NONZERO_LOCATIONS: 169 a->nonew = 0; 170 break; 171 case MAT_ROWS_SORTED: 172 case MAT_ROWS_UNSORTED: 173 case MAT_YES_NEW_DIAGONALS: 174 case MAT_IGNORE_OFF_PROC_ENTRIES: 175 case MAT_USE_HASH_TABLE: 176 case MAT_USE_SINGLE_PRECISION_SOLVES: 177 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 178 break; 179 case MAT_NO_NEW_DIAGONALS: 180 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 181 default: 182 SETERRQ(PETSC_ERR_SUP,"unknown option"); 183 } 184 PetscFunctionReturn(0); 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 189 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 190 { 191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 192 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 193 MatScalar *aa,*aa_i; 194 PetscScalar *v_i; 195 196 PetscFunctionBegin; 197 bs = a->bs; 198 ai = a->i; 199 aj = a->j; 200 aa = a->a; 201 bs2 = a->bs2; 202 203 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 204 205 bn = row/bs; /* Block number */ 206 bp = row % bs; /* Block position */ 207 M = ai[bn+1] - ai[bn]; 208 *ncols = bs*M; 209 210 if (v) { 211 *v = 0; 212 if (*ncols) { 213 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 214 for (i=0; i<M; i++) { /* for each block in the block row */ 215 v_i = *v + i*bs; 216 aa_i = aa + bs2*(ai[bn] + i); 217 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 218 } 219 } 220 } 221 222 if (cols) { 223 *cols = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 cols_i = *cols + i*bs; 228 itmp = bs*aj[ai[bn] + i]; 229 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 230 } 231 } 232 } 233 234 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 235 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 236 #ifdef column_search 237 v_i = *v + M*bs; 238 cols_i = *cols + M*bs; 239 for (i=0; i<bn; i++){ /* for each block row */ 240 M = ai[i+1] - ai[i]; 241 for (j=0; j<M; j++){ 242 itmp = aj[ai[i] + j]; /* block column value */ 243 if (itmp == bn){ 244 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 245 for (k=0; k<bs; k++) { 246 *cols_i++ = i*bs+k; 247 *v_i++ = aa_i[k]; 248 } 249 *ncols += bs; 250 break; 251 } 252 } 253 } 254 #endif 255 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 261 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 262 { 263 int ierr; 264 265 PetscFunctionBegin; 266 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 267 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 273 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 274 { 275 int ierr; 276 PetscFunctionBegin; 277 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 283 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 284 { 285 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 286 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 287 PetscScalar *aa; 288 FILE *file; 289 290 PetscFunctionBegin; 291 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 292 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 293 col_lens[0] = MAT_FILE_COOKIE; 294 295 col_lens[1] = A->m; 296 col_lens[2] = A->m; 297 col_lens[3] = a->s_nz*bs2; 298 299 /* store lengths of each row and write (including header) to file */ 300 count = 0; 301 for (i=0; i<a->mbs; i++) { 302 for (j=0; j<bs; j++) { 303 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 304 } 305 } 306 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 307 ierr = PetscFree(col_lens);CHKERRQ(ierr); 308 309 /* store column indices (zero start index) */ 310 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 311 count = 0; 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 for (k=a->i[i]; k<a->i[i+1]; k++) { 315 for (l=0; l<bs; l++) { 316 jj[count++] = bs*a->j[k] + l; 317 } 318 } 319 } 320 } 321 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 322 ierr = PetscFree(jj);CHKERRQ(ierr); 323 324 /* store nonzero values */ 325 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 326 count = 0; 327 for (i=0; i<a->mbs; i++) { 328 for (j=0; j<bs; j++) { 329 for (k=a->i[i]; k<a->i[i+1]; k++) { 330 for (l=0; l<bs; l++) { 331 aa[count++] = a->a[bs2*k + l*bs + j]; 332 } 333 } 334 } 335 } 336 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 337 ierr = PetscFree(aa);CHKERRQ(ierr); 338 339 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 340 if (file) { 341 fprintf(file,"-matload_block_size %d\n",a->bs); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 348 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 349 { 350 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 351 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 352 char *name; 353 PetscViewerFormat format; 354 355 PetscFunctionBegin; 356 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 357 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 358 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 359 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 360 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 361 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 362 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 364 for (i=0; i<a->mbs; i++) { 365 for (j=0; j<bs; j++) { 366 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 367 for (k=a->i[i]; k<a->i[i+1]; k++) { 368 for (l=0; l<bs; l++) { 369 #if defined(PETSC_USE_COMPLEX) 370 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 371 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 372 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 373 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 374 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 375 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 376 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 377 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 378 } 379 #else 380 if (a->a[bs2*k + l*bs + j] != 0.0) { 381 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 382 } 383 #endif 384 } 385 } 386 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 387 } 388 } 389 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 390 } else { 391 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 392 for (i=0; i<a->mbs; i++) { 393 for (j=0; j<bs; j++) { 394 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 395 for (k=a->i[i]; k<a->i[i+1]; k++) { 396 for (l=0; l<bs; l++) { 397 #if defined(PETSC_USE_COMPLEX) 398 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 399 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 400 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 401 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 402 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 403 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 404 } else { 405 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 406 } 407 #else 408 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 409 #endif 410 } 411 } 412 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 413 } 414 } 415 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 416 } 417 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 423 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 424 { 425 Mat A = (Mat) Aa; 426 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 427 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 428 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 429 MatScalar *aa; 430 MPI_Comm comm; 431 PetscViewer viewer; 432 433 PetscFunctionBegin; 434 /* 435 This is nasty. If this is called from an originally parallel matrix 436 then all processes call this,but only the first has the matrix so the 437 rest should return immediately. 438 */ 439 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 440 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 441 if (rank) PetscFunctionReturn(0); 442 443 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 444 445 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 446 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 447 448 /* loop over matrix elements drawing boxes */ 449 color = PETSC_DRAW_BLUE; 450 for (i=0,row=0; i<mbs; i++,row+=bs) { 451 for (j=a->i[i]; j<a->i[i+1]; j++) { 452 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 453 x_l = a->j[j]*bs; x_r = x_l + 1.0; 454 aa = a->a + j*bs2; 455 for (k=0; k<bs; k++) { 456 for (l=0; l<bs; l++) { 457 if (PetscRealPart(*aa++) >= 0.) continue; 458 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 459 } 460 } 461 } 462 } 463 color = PETSC_DRAW_CYAN; 464 for (i=0,row=0; i<mbs; i++,row+=bs) { 465 for (j=a->i[i]; j<a->i[i+1]; j++) { 466 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 467 x_l = a->j[j]*bs; x_r = x_l + 1.0; 468 aa = a->a + j*bs2; 469 for (k=0; k<bs; k++) { 470 for (l=0; l<bs; l++) { 471 if (PetscRealPart(*aa++) != 0.) continue; 472 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 473 } 474 } 475 } 476 } 477 478 color = PETSC_DRAW_RED; 479 for (i=0,row=0; i<mbs; i++,row+=bs) { 480 for (j=a->i[i]; j<a->i[i+1]; j++) { 481 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 482 x_l = a->j[j]*bs; x_r = x_l + 1.0; 483 aa = a->a + j*bs2; 484 for (k=0; k<bs; k++) { 485 for (l=0; l<bs; l++) { 486 if (PetscRealPart(*aa++) <= 0.) continue; 487 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 497 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 498 { 499 int ierr; 500 PetscReal xl,yl,xr,yr,w,h; 501 PetscDraw draw; 502 PetscTruth isnull; 503 504 PetscFunctionBegin; 505 506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 508 509 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 510 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 511 xr += w; yr += h; xl = -w; yl = -h; 512 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 513 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 514 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNCT__ 519 #define __FUNCT__ "MatView_SeqSBAIJ" 520 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 521 { 522 int ierr; 523 PetscTruth issocket,isascii,isbinary,isdraw; 524 525 PetscFunctionBegin; 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 530 if (issocket) { 531 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 532 } else if (isascii){ 533 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 534 } else if (isbinary) { 535 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 536 } else if (isdraw) { 537 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 538 } else { 539 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 540 } 541 PetscFunctionReturn(0); 542 } 543 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 547 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 548 { 549 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 550 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 551 int *ai = a->i,*ailen = a->ilen; 552 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 553 MatScalar *ap,*aa = a->a,zero = 0.0; 554 555 PetscFunctionBegin; 556 for (k=0; k<m; k++) { /* loop over rows */ 557 row = im[k]; brow = row/bs; 558 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 559 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 560 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 561 nrow = ailen[brow]; 562 for (l=0; l<n; l++) { /* loop over columns */ 563 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 564 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 565 col = in[l] ; 566 bcol = col/bs; 567 cidx = col%bs; 568 ridx = row%bs; 569 high = nrow; 570 low = 0; /* assume unsorted */ 571 while (high-low > 5) { 572 t = (low+high)/2; 573 if (rp[t] > bcol) high = t; 574 else low = t; 575 } 576 for (i=low; i<high; i++) { 577 if (rp[i] > bcol) break; 578 if (rp[i] == bcol) { 579 *v++ = ap[bs2*i+bs*cidx+ridx]; 580 goto finished; 581 } 582 } 583 *v++ = zero; 584 finished:; 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 593 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 594 { 595 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 596 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 597 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 598 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 599 PetscTruth roworiented=a->roworiented; 600 MatScalar *value = v,*ap,*aa = a->a,*bap; 601 602 PetscFunctionBegin; 603 if (roworiented) { 604 stepval = (n-1)*bs; 605 } else { 606 stepval = (m-1)*bs; 607 } 608 for (k=0; k<m; k++) { /* loop over added rows */ 609 row = im[k]; 610 if (row < 0) continue; 611 #if defined(PETSC_USE_BOPT_g) 612 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 613 #endif 614 rp = aj + ai[row]; 615 ap = aa + bs2*ai[row]; 616 rmax = imax[row]; 617 nrow = ailen[row]; 618 low = 0; 619 for (l=0; l<n; l++) { /* loop over added columns */ 620 if (in[l] < 0) continue; 621 #if defined(PETSC_USE_BOPT_g) 622 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 623 #endif 624 col = in[l]; 625 if (col < row) continue; /* ignore lower triangular block */ 626 if (roworiented) { 627 value = v + k*(stepval+bs)*bs + l*bs; 628 } else { 629 value = v + l*(stepval+bs)*bs + k*bs; 630 } 631 if (!sorted) low = 0; high = nrow; 632 while (high-low > 7) { 633 t = (low+high)/2; 634 if (rp[t] > col) high = t; 635 else low = t; 636 } 637 for (i=low; i<high; i++) { 638 if (rp[i] > col) break; 639 if (rp[i] == col) { 640 bap = ap + bs2*i; 641 if (roworiented) { 642 if (is == ADD_VALUES) { 643 for (ii=0; ii<bs; ii++,value+=stepval) { 644 for (jj=ii; jj<bs2; jj+=bs) { 645 bap[jj] += *value++; 646 } 647 } 648 } else { 649 for (ii=0; ii<bs; ii++,value+=stepval) { 650 for (jj=ii; jj<bs2; jj+=bs) { 651 bap[jj] = *value++; 652 } 653 } 654 } 655 } else { 656 if (is == ADD_VALUES) { 657 for (ii=0; ii<bs; ii++,value+=stepval) { 658 for (jj=0; jj<bs; jj++) { 659 *bap++ += *value++; 660 } 661 } 662 } else { 663 for (ii=0; ii<bs; ii++,value+=stepval) { 664 for (jj=0; jj<bs; jj++) { 665 *bap++ = *value++; 666 } 667 } 668 } 669 } 670 goto noinsert2; 671 } 672 } 673 if (nonew == 1) goto noinsert2; 674 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 675 if (nrow >= rmax) { 676 /* there is no extra room in row, therefore enlarge */ 677 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 678 MatScalar *new_a; 679 680 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 681 682 /* malloc new storage space */ 683 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 684 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 685 new_j = (int*)(new_a + bs2*new_nz); 686 new_i = new_j + new_nz; 687 688 /* copy over old data into new slots */ 689 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 690 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 691 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 692 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 693 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 694 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 695 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 696 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 697 /* free up old matrix storage */ 698 ierr = PetscFree(a->a);CHKERRQ(ierr); 699 if (!a->singlemalloc) { 700 ierr = PetscFree(a->i);CHKERRQ(ierr); 701 ierr = PetscFree(a->j);CHKERRQ(ierr); 702 } 703 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 704 a->singlemalloc = PETSC_TRUE; 705 706 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 707 rmax = imax[row] = imax[row] + CHUNKSIZE; 708 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 709 a->s_maxnz += bs2*CHUNKSIZE; 710 a->reallocs++; 711 a->s_nz++; 712 } 713 N = nrow++ - 1; 714 /* shift up all the later entries in this row */ 715 for (ii=N; ii>=i; ii--) { 716 rp[ii+1] = rp[ii]; 717 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 718 } 719 if (N >= i) { 720 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 721 } 722 rp[i] = col; 723 bap = ap + bs2*i; 724 if (roworiented) { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 bap[jj] = *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval) { 732 for (jj=0; jj<bs; jj++) { 733 *bap++ = *value++; 734 } 735 } 736 } 737 noinsert2:; 738 low = i; 739 } 740 ailen[row] = nrow; 741 } 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 747 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 748 { 749 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 750 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 751 int m = A->m,*ip,N,*ailen = a->ilen; 752 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 753 MatScalar *aa = a->a,*ap; 754 #if defined(PETSC_HAVE_SPOOLES) 755 PetscTruth flag; 756 #endif 757 758 PetscFunctionBegin; 759 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 760 761 if (m) rmax = ailen[0]; 762 for (i=1; i<mbs; i++) { 763 /* move each row back by the amount of empty slots (fshift) before it*/ 764 fshift += imax[i-1] - ailen[i-1]; 765 rmax = PetscMax(rmax,ailen[i]); 766 if (fshift) { 767 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 768 N = ailen[i]; 769 for (j=0; j<N; j++) { 770 ip[j-fshift] = ip[j]; 771 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 772 } 773 } 774 ai[i] = ai[i-1] + ailen[i-1]; 775 } 776 if (mbs) { 777 fshift += imax[mbs-1] - ailen[mbs-1]; 778 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 779 } 780 /* reset ilen and imax for each row */ 781 for (i=0; i<mbs; i++) { 782 ailen[i] = imax[i] = ai[i+1] - ai[i]; 783 } 784 a->s_nz = ai[mbs]; 785 786 /* diagonals may have moved, reset it */ 787 if (a->diag) { 788 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 789 } 790 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 791 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 792 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 793 a->reallocs); 794 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 795 a->reallocs = 0; 796 A->info.nz_unneeded = (PetscReal)fshift*bs2; 797 798 #if defined(PETSC_HAVE_SPOOLES) 799 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 800 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 801 #endif 802 803 PetscFunctionReturn(0); 804 } 805 806 /* 807 This function returns an array of flags which indicate the locations of contiguous 808 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 809 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 810 Assume: sizes should be long enough to hold all the values. 811 */ 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 814 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 815 { 816 int i,j,k,row; 817 PetscTruth flg; 818 819 PetscFunctionBegin; 820 for (i=0,j=0; i<n; j++) { 821 row = idx[i]; 822 if (row%bs!=0) { /* Not the begining of a block */ 823 sizes[j] = 1; 824 i++; 825 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 826 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 827 i++; 828 } else { /* Begining of the block, so check if the complete block exists */ 829 flg = PETSC_TRUE; 830 for (k=1; k<bs; k++) { 831 if (row+k != idx[i+k]) { /* break in the block */ 832 flg = PETSC_FALSE; 833 break; 834 } 835 } 836 if (flg == PETSC_TRUE) { /* No break in the bs */ 837 sizes[j] = bs; 838 i+= bs; 839 } else { 840 sizes[j] = 1; 841 i++; 842 } 843 } 844 } 845 *bs_max = j; 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 851 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 852 { 853 PetscFunctionBegin; 854 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 855 } 856 857 /* Only add/insert a(i,j) with i<=j (blocks). 858 Any a(i,j) with i>j input by user is ingored. 859 */ 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 863 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 864 { 865 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 866 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 867 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 868 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 869 int ridx,cidx,bs2=a->bs2,ierr; 870 MatScalar *ap,value,*aa=a->a,*bap; 871 872 PetscFunctionBegin; 873 874 for (k=0; k<m; k++) { /* loop over added rows */ 875 row = im[k]; /* row number */ 876 brow = row/bs; /* block row number */ 877 if (row < 0) continue; 878 #if defined(PETSC_USE_BOPT_g) 879 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 880 #endif 881 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 882 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 883 rmax = imax[brow]; /* maximum space allocated for this row */ 884 nrow = ailen[brow]; /* actual length of this row */ 885 low = 0; 886 887 for (l=0; l<n; l++) { /* loop over added columns */ 888 if (in[l] < 0) continue; 889 #if defined(PETSC_USE_BOPT_g) 890 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 891 #endif 892 col = in[l]; 893 bcol = col/bs; /* block col number */ 894 895 if (brow <= bcol){ 896 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 897 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 898 /* element value a(k,l) */ 899 if (roworiented) { 900 value = v[l + k*n]; 901 } else { 902 value = v[k + l*m]; 903 } 904 905 /* move pointer bap to a(k,l) quickly and add/insert value */ 906 if (!sorted) low = 0; high = nrow; 907 while (high-low > 7) { 908 t = (low+high)/2; 909 if (rp[t] > bcol) high = t; 910 else low = t; 911 } 912 for (i=low; i<high; i++) { 913 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 914 if (rp[i] > bcol) break; 915 if (rp[i] == bcol) { 916 bap = ap + bs2*i + bs*cidx + ridx; 917 if (is == ADD_VALUES) *bap += value; 918 else *bap = value; 919 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 920 if (brow == bcol && ridx < cidx){ 921 bap = ap + bs2*i + bs*ridx + cidx; 922 if (is == ADD_VALUES) *bap += value; 923 else *bap = value; 924 } 925 goto noinsert1; 926 } 927 } 928 929 if (nonew == 1) goto noinsert1; 930 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 931 if (nrow >= rmax) { 932 /* there is no extra room in row, therefore enlarge */ 933 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 934 MatScalar *new_a; 935 936 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 937 938 /* Malloc new storage space */ 939 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 940 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 941 new_j = (int*)(new_a + bs2*new_nz); 942 new_i = new_j + new_nz; 943 944 /* copy over old data into new slots */ 945 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 946 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 947 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 948 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 949 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 950 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 951 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 952 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 953 /* free up old matrix storage */ 954 ierr = PetscFree(a->a);CHKERRQ(ierr); 955 if (!a->singlemalloc) { 956 ierr = PetscFree(a->i);CHKERRQ(ierr); 957 ierr = PetscFree(a->j);CHKERRQ(ierr); 958 } 959 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 960 a->singlemalloc = PETSC_TRUE; 961 962 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 963 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 964 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 965 a->s_maxnz += bs2*CHUNKSIZE; 966 a->reallocs++; 967 a->s_nz++; 968 } 969 970 N = nrow++ - 1; 971 /* shift up all the later entries in this row */ 972 for (ii=N; ii>=i; ii--) { 973 rp[ii+1] = rp[ii]; 974 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 975 } 976 if (N>=i) { 977 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 978 } 979 rp[i] = bcol; 980 ap[bs2*i + bs*cidx + ridx] = value; 981 noinsert1:; 982 low = i; 983 /* } */ 984 } 985 } /* end of if .. if.. */ 986 } /* end of loop over added columns */ 987 ailen[brow] = nrow; 988 } /* end of loop over added rows */ 989 990 PetscFunctionReturn(0); 991 } 992 993 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 994 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 995 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 996 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 997 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 998 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 999 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1000 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1001 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1002 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1003 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1004 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1005 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1006 extern int MatZeroEntries_SeqSBAIJ(Mat); 1007 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1008 1009 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1010 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1011 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1012 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1013 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1014 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1017 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1018 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1019 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1020 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1021 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1024 1025 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1026 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1027 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1028 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1029 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1030 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1031 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1033 1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1042 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1043 1044 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1045 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1046 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1047 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1048 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1049 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1050 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1051 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1052 1053 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1054 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1055 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1056 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1057 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1058 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1059 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1060 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1061 1062 #ifdef HAVE_MatICCFactor 1063 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1066 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 1067 { 1068 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1069 Mat outA; 1070 int ierr; 1071 PetscTruth row_identity,col_identity; 1072 1073 PetscFunctionBegin; 1074 /* 1075 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1076 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1077 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1078 if (!row_identity || !col_identity) { 1079 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1080 } 1081 */ 1082 1083 outA = inA; 1084 inA->factor = FACTOR_CHOLESKY; 1085 1086 if (!a->diag) { 1087 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1088 } 1089 /* 1090 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1091 for ILU(0) factorization with natural ordering 1092 */ 1093 switch (a->bs) { 1094 case 1: 1095 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1096 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1097 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1098 case 2: 1099 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1100 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1101 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1102 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1103 break; 1104 case 3: 1105 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1106 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1107 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1108 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1109 break; 1110 case 4: 1111 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1112 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1113 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1114 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1115 break; 1116 case 5: 1117 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1118 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1119 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1120 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1121 break; 1122 case 6: 1123 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1124 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1125 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1126 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1127 break; 1128 case 7: 1129 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1130 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1131 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1132 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1133 break; 1134 default: 1135 a->row = row; 1136 a->icol = col; 1137 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1138 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1139 1140 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1141 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1142 PetscLogObjectParent(inA,a->icol); 1143 1144 if (!a->solve_work) { 1145 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1146 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1147 } 1148 } 1149 1150 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1151 1152 PetscFunctionReturn(0); 1153 } 1154 #endif 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1158 int MatPrintHelp_SeqSBAIJ(Mat A) 1159 { 1160 static PetscTruth called = PETSC_FALSE; 1161 MPI_Comm comm = A->comm; 1162 int ierr; 1163 1164 PetscFunctionBegin; 1165 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1166 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1167 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 EXTERN_C_BEGIN 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1174 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1175 { 1176 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1177 int i,nz,n; 1178 1179 PetscFunctionBegin; 1180 nz = baij->s_maxnz; 1181 n = mat->n; 1182 for (i=0; i<nz; i++) { 1183 baij->j[i] = indices[i]; 1184 } 1185 baij->s_nz = nz; 1186 for (i=0; i<n; i++) { 1187 baij->ilen[i] = baij->imax[i]; 1188 } 1189 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1196 /*@ 1197 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1198 in the matrix. 1199 1200 Input Parameters: 1201 + mat - the SeqSBAIJ matrix 1202 - indices - the column indices 1203 1204 Level: advanced 1205 1206 Notes: 1207 This can be called if you have precomputed the nonzero structure of the 1208 matrix and want to provide it to the matrix object to improve the performance 1209 of the MatSetValues() operation. 1210 1211 You MUST have set the correct numbers of nonzeros per row in the call to 1212 MatCreateSeqSBAIJ(). 1213 1214 MUST be called before any calls to MatSetValues() 1215 1216 .seealso: MatCreateSeqSBAIJ 1217 @*/ 1218 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1219 { 1220 int ierr,(*f)(Mat,int *); 1221 1222 PetscFunctionBegin; 1223 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1224 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1225 if (f) { 1226 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1227 } else { 1228 SETERRQ(1,"Wrong type of matrix to set column indices"); 1229 } 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1235 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1236 { 1237 int ierr; 1238 1239 PetscFunctionBegin; 1240 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1246 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array) 1247 { 1248 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1249 PetscFunctionBegin; 1250 *array = a->a; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1256 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array) 1257 { 1258 PetscFunctionBegin; 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #include "petscblaslapack.h" 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1265 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1266 { 1267 int ierr,one=1; 1268 Mat_SeqSBAIJ *x = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data; 1269 1270 PetscFunctionBegin; 1271 if (str == SAME_NONZERO_PATTERN) { 1272 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1273 } else { 1274 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 /* -------------------------------------------------------------------*/ 1280 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1281 MatGetRow_SeqSBAIJ, 1282 MatRestoreRow_SeqSBAIJ, 1283 MatMult_SeqSBAIJ_N, 1284 MatMultAdd_SeqSBAIJ_N, 1285 MatMultTranspose_SeqSBAIJ, 1286 MatMultTransposeAdd_SeqSBAIJ, 1287 MatSolve_SeqSBAIJ_N, 1288 0, 1289 0, 1290 0, 1291 0, 1292 MatCholeskyFactor_SeqSBAIJ, 1293 MatRelax_SeqSBAIJ, 1294 MatTranspose_SeqSBAIJ, 1295 MatGetInfo_SeqSBAIJ, 1296 MatEqual_SeqSBAIJ, 1297 MatGetDiagonal_SeqSBAIJ, 1298 MatDiagonalScale_SeqSBAIJ, 1299 MatNorm_SeqSBAIJ, 1300 0, 1301 MatAssemblyEnd_SeqSBAIJ, 1302 0, 1303 MatSetOption_SeqSBAIJ, 1304 MatZeroEntries_SeqSBAIJ, 1305 MatZeroRows_SeqSBAIJ, 1306 0, 1307 0, 1308 MatCholeskyFactorSymbolic_SeqSBAIJ, 1309 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1310 MatSetUpPreallocation_SeqSBAIJ, 1311 0, 1312 MatICCFactorSymbolic_SeqSBAIJ, 1313 MatGetArray_SeqSBAIJ, 1314 MatRestoreArray_SeqSBAIJ, 1315 MatDuplicate_SeqSBAIJ, 1316 0, 1317 0, 1318 0, 1319 0, 1320 MatAXPY_SeqSBAIJ, 1321 MatGetSubMatrices_SeqSBAIJ, 1322 MatIncreaseOverlap_SeqSBAIJ, 1323 MatGetValues_SeqSBAIJ, 1324 0, 1325 MatPrintHelp_SeqSBAIJ, 1326 MatScale_SeqSBAIJ, 1327 0, 1328 0, 1329 0, 1330 MatGetBlockSize_SeqSBAIJ, 1331 MatGetRowIJ_SeqSBAIJ, 1332 MatRestoreRowIJ_SeqSBAIJ, 1333 0, 1334 0, 1335 0, 1336 0, 1337 0, 1338 0, 1339 MatSetValuesBlocked_SeqSBAIJ, 1340 MatGetSubMatrix_SeqSBAIJ, 1341 0, 1342 0, 1343 MatGetPetscMaps_Petsc, 1344 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 0, 1350 MatGetRowMax_SeqSBAIJ, 1351 0, 1352 0, 1353 0, 1354 0, 1355 0, 1356 0, 1357 0, 1358 0, 1359 0, 1360 0, 1361 0, 1362 0, 1363 0, 1364 #if !defined(PETSC_USE_COMPLEX) 1365 MatGetInertia_SeqSBAIJ 1366 #else 1367 0 1368 #endif 1369 }; 1370 1371 EXTERN_C_BEGIN 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1374 int MatStoreValues_SeqSBAIJ(Mat mat) 1375 { 1376 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1377 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1378 int ierr; 1379 1380 PetscFunctionBegin; 1381 if (aij->nonew != 1) { 1382 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1383 } 1384 1385 /* allocate space for values if not already there */ 1386 if (!aij->saved_values) { 1387 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1388 } 1389 1390 /* copy values over */ 1391 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 EXTERN_C_END 1395 1396 EXTERN_C_BEGIN 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1399 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1400 { 1401 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1402 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1403 1404 PetscFunctionBegin; 1405 if (aij->nonew != 1) { 1406 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1407 } 1408 if (!aij->saved_values) { 1409 SETERRQ(1,"Must call MatStoreValues(A);first"); 1410 } 1411 1412 /* copy values over */ 1413 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 EXTERN_C_END 1417 1418 EXTERN_C_BEGIN 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1421 int MatCreate_SeqSBAIJ(Mat B) 1422 { 1423 Mat_SeqSBAIJ *b; 1424 int ierr,size; 1425 1426 PetscFunctionBegin; 1427 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1428 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1429 B->m = B->M = PetscMax(B->m,B->M); 1430 B->n = B->N = PetscMax(B->n,B->N); 1431 1432 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1433 B->data = (void*)b; 1434 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1435 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1436 B->ops->destroy = MatDestroy_SeqSBAIJ; 1437 B->ops->view = MatView_SeqSBAIJ; 1438 B->factor = 0; 1439 B->lupivotthreshold = 1.0; 1440 B->mapping = 0; 1441 b->row = 0; 1442 b->icol = 0; 1443 b->reallocs = 0; 1444 b->saved_values = 0; 1445 1446 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1447 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1448 1449 b->sorted = PETSC_FALSE; 1450 b->roworiented = PETSC_TRUE; 1451 b->nonew = 0; 1452 b->diag = 0; 1453 b->solve_work = 0; 1454 b->mult_work = 0; 1455 B->spptr = 0; 1456 b->keepzeroedrows = PETSC_FALSE; 1457 1458 b->inew = 0; 1459 b->jnew = 0; 1460 b->anew = 0; 1461 b->a2anew = 0; 1462 b->permute = PETSC_FALSE; 1463 1464 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1465 "MatStoreValues_SeqSBAIJ", 1466 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1467 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1468 "MatRetrieveValues_SeqSBAIJ", 1469 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1470 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1471 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1472 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 EXTERN_C_END 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1479 /*@C 1480 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1481 compressed row) format. For good matrix assembly performance the 1482 user should preallocate the matrix storage by setting the parameter nz 1483 (or the array nnz). By setting these parameters accurately, performance 1484 during matrix assembly can be increased by more than a factor of 50. 1485 1486 Collective on Mat 1487 1488 Input Parameters: 1489 + A - the symmetric matrix 1490 . bs - size of block 1491 . nz - number of block nonzeros per block row (same for all rows) 1492 - nnz - array containing the number of block nonzeros in the upper triangular plus 1493 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1494 1495 Options Database Keys: 1496 . -mat_no_unroll - uses code that does not unroll the loops in the 1497 block calculations (much slower) 1498 . -mat_block_size - size of the blocks to use 1499 1500 Level: intermediate 1501 1502 Notes: 1503 Specify the preallocated storage with either nz or nnz (not both). 1504 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1505 allocation. For additional details, see the users manual chapter on 1506 matrices. 1507 1508 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1509 @*/ 1510 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1511 { 1512 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1513 int i,len,ierr,mbs,bs2; 1514 PetscTruth flg; 1515 int s_nz; 1516 1517 PetscFunctionBegin; 1518 B->preallocated = PETSC_TRUE; 1519 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1520 mbs = B->m/bs; 1521 bs2 = bs*bs; 1522 1523 if (mbs*bs != B->m) { 1524 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1525 } 1526 1527 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1528 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1529 if (nnz) { 1530 for (i=0; i<mbs; i++) { 1531 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1532 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1533 } 1534 } 1535 1536 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1537 if (!flg) { 1538 switch (bs) { 1539 case 1: 1540 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1541 B->ops->solve = MatSolve_SeqSBAIJ_1; 1542 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1543 B->ops->mult = MatMult_SeqSBAIJ_1; 1544 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1545 break; 1546 case 2: 1547 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1548 B->ops->solve = MatSolve_SeqSBAIJ_2; 1549 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1550 B->ops->mult = MatMult_SeqSBAIJ_2; 1551 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1552 break; 1553 case 3: 1554 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1555 B->ops->solve = MatSolve_SeqSBAIJ_3; 1556 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1557 B->ops->mult = MatMult_SeqSBAIJ_3; 1558 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1559 break; 1560 case 4: 1561 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1562 B->ops->solve = MatSolve_SeqSBAIJ_4; 1563 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1564 B->ops->mult = MatMult_SeqSBAIJ_4; 1565 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1566 break; 1567 case 5: 1568 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1569 B->ops->solve = MatSolve_SeqSBAIJ_5; 1570 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1571 B->ops->mult = MatMult_SeqSBAIJ_5; 1572 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1573 break; 1574 case 6: 1575 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1576 B->ops->solve = MatSolve_SeqSBAIJ_6; 1577 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1578 B->ops->mult = MatMult_SeqSBAIJ_6; 1579 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1580 break; 1581 case 7: 1582 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1583 B->ops->solve = MatSolve_SeqSBAIJ_7; 1584 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1585 B->ops->mult = MatMult_SeqSBAIJ_7; 1586 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1587 break; 1588 } 1589 } 1590 1591 b->mbs = mbs; 1592 b->nbs = mbs; 1593 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1594 if (!nnz) { 1595 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1596 else if (nz <= 0) nz = 1; 1597 for (i=0; i<mbs; i++) { 1598 b->imax[i] = nz; 1599 } 1600 nz = nz*mbs; /* total nz */ 1601 } else { 1602 nz = 0; 1603 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1604 } 1605 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1606 s_nz = nz; 1607 1608 /* allocate the matrix space */ 1609 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1610 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1611 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1612 b->j = (int*)(b->a + s_nz*bs2); 1613 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1614 b->i = b->j + s_nz; 1615 b->singlemalloc = PETSC_TRUE; 1616 1617 /* pointer to beginning of each row */ 1618 b->i[0] = 0; 1619 for (i=1; i<mbs+1; i++) { 1620 b->i[i] = b->i[i-1] + b->imax[i-1]; 1621 } 1622 1623 /* b->ilen will count nonzeros in each block row so far. */ 1624 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1625 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1626 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1627 1628 b->bs = bs; 1629 b->bs2 = bs2; 1630 b->s_nz = 0; 1631 b->s_maxnz = s_nz*bs2; 1632 1633 b->inew = 0; 1634 b->jnew = 0; 1635 b->anew = 0; 1636 b->a2anew = 0; 1637 b->permute = PETSC_FALSE; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 1642 #undef __FUNCT__ 1643 #define __FUNCT__ "MatCreateSeqSBAIJ" 1644 /*@C 1645 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1646 compressed row) format. For good matrix assembly performance the 1647 user should preallocate the matrix storage by setting the parameter nz 1648 (or the array nnz). By setting these parameters accurately, performance 1649 during matrix assembly can be increased by more than a factor of 50. 1650 1651 Collective on MPI_Comm 1652 1653 Input Parameters: 1654 + comm - MPI communicator, set to PETSC_COMM_SELF 1655 . bs - size of block 1656 . m - number of rows, or number of columns 1657 . nz - number of block nonzeros per block row (same for all rows) 1658 - nnz - array containing the number of block nonzeros in the upper triangular plus 1659 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1660 1661 Output Parameter: 1662 . A - the symmetric matrix 1663 1664 Options Database Keys: 1665 . -mat_no_unroll - uses code that does not unroll the loops in the 1666 block calculations (much slower) 1667 . -mat_block_size - size of the blocks to use 1668 1669 Level: intermediate 1670 1671 Notes: 1672 1673 Specify the preallocated storage with either nz or nnz (not both). 1674 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1675 allocation. For additional details, see the users manual chapter on 1676 matrices. 1677 1678 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1679 @*/ 1680 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1681 { 1682 int ierr; 1683 1684 PetscFunctionBegin; 1685 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1686 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1687 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1693 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1694 { 1695 Mat C; 1696 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1697 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1698 1699 PetscFunctionBegin; 1700 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1701 1702 *B = 0; 1703 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1704 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1705 c = (Mat_SeqSBAIJ*)C->data; 1706 1707 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1708 C->preallocated = PETSC_TRUE; 1709 C->factor = A->factor; 1710 c->row = 0; 1711 c->icol = 0; 1712 c->saved_values = 0; 1713 c->keepzeroedrows = a->keepzeroedrows; 1714 C->assembled = PETSC_TRUE; 1715 1716 c->bs = a->bs; 1717 c->bs2 = a->bs2; 1718 c->mbs = a->mbs; 1719 c->nbs = a->nbs; 1720 1721 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1722 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1723 for (i=0; i<mbs; i++) { 1724 c->imax[i] = a->imax[i]; 1725 c->ilen[i] = a->ilen[i]; 1726 } 1727 1728 /* allocate the matrix space */ 1729 c->singlemalloc = PETSC_TRUE; 1730 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1731 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1732 c->j = (int*)(c->a + nz*bs2); 1733 c->i = c->j + nz; 1734 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1735 if (mbs > 0) { 1736 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1737 if (cpvalues == MAT_COPY_VALUES) { 1738 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1739 } else { 1740 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1741 } 1742 } 1743 1744 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1745 c->sorted = a->sorted; 1746 c->roworiented = a->roworiented; 1747 c->nonew = a->nonew; 1748 1749 if (a->diag) { 1750 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1751 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1752 for (i=0; i<mbs; i++) { 1753 c->diag[i] = a->diag[i]; 1754 } 1755 } else c->diag = 0; 1756 c->s_nz = a->s_nz; 1757 c->s_maxnz = a->s_maxnz; 1758 c->solve_work = 0; 1759 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1760 c->mult_work = 0; 1761 *B = C; 1762 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 EXTERN_C_BEGIN 1767 #undef __FUNCT__ 1768 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1769 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1770 { 1771 Mat_SeqSBAIJ *a; 1772 Mat B; 1773 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1774 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1775 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1776 int *masked,nmask,tmp,bs2,ishift; 1777 PetscScalar *aa; 1778 MPI_Comm comm = ((PetscObject)viewer)->comm; 1779 1780 PetscFunctionBegin; 1781 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1782 bs2 = bs*bs; 1783 1784 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1785 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1786 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1787 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1788 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1789 M = header[1]; N = header[2]; nz = header[3]; 1790 1791 if (header[3] < 0) { 1792 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1793 } 1794 1795 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1796 1797 /* 1798 This code adds extra rows to make sure the number of rows is 1799 divisible by the blocksize 1800 */ 1801 mbs = M/bs; 1802 extra_rows = bs - M + bs*(mbs); 1803 if (extra_rows == bs) extra_rows = 0; 1804 else mbs++; 1805 if (extra_rows) { 1806 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1807 } 1808 1809 /* read in row lengths */ 1810 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1811 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1812 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1813 1814 /* read in column indices */ 1815 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1816 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1817 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1818 1819 /* loop over row lengths determining block row lengths */ 1820 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1821 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1822 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1823 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1824 masked = mask + mbs; 1825 rowcount = 0; nzcount = 0; 1826 for (i=0; i<mbs; i++) { 1827 nmask = 0; 1828 for (j=0; j<bs; j++) { 1829 kmax = rowlengths[rowcount]; 1830 for (k=0; k<kmax; k++) { 1831 tmp = jj[nzcount++]/bs; /* block col. index */ 1832 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1833 } 1834 rowcount++; 1835 } 1836 s_browlengths[i] += nmask; 1837 1838 /* zero out the mask elements we set */ 1839 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1840 } 1841 1842 /* create our matrix */ 1843 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1844 B = *A; 1845 a = (Mat_SeqSBAIJ*)B->data; 1846 1847 /* set matrix "i" values */ 1848 a->i[0] = 0; 1849 for (i=1; i<= mbs; i++) { 1850 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1851 a->ilen[i-1] = s_browlengths[i-1]; 1852 } 1853 a->s_nz = a->i[mbs]; 1854 1855 /* read in nonzero values */ 1856 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1857 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1858 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1859 1860 /* set "a" and "j" values into matrix */ 1861 nzcount = 0; jcount = 0; 1862 for (i=0; i<mbs; i++) { 1863 nzcountb = nzcount; 1864 nmask = 0; 1865 for (j=0; j<bs; j++) { 1866 kmax = rowlengths[i*bs+j]; 1867 for (k=0; k<kmax; k++) { 1868 tmp = jj[nzcount++]/bs; /* block col. index */ 1869 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1870 } 1871 } 1872 /* sort the masked values */ 1873 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1874 1875 /* set "j" values into matrix */ 1876 maskcount = 1; 1877 for (j=0; j<nmask; j++) { 1878 a->j[jcount++] = masked[j]; 1879 mask[masked[j]] = maskcount++; 1880 } 1881 1882 /* set "a" values into matrix */ 1883 ishift = bs2*a->i[i]; 1884 for (j=0; j<bs; j++) { 1885 kmax = rowlengths[i*bs+j]; 1886 for (k=0; k<kmax; k++) { 1887 tmp = jj[nzcountb]/bs ; /* block col. index */ 1888 if (tmp >= i){ 1889 block = mask[tmp] - 1; 1890 point = jj[nzcountb] - bs*tmp; 1891 idx = ishift + bs2*block + j + bs*point; 1892 a->a[idx] = aa[nzcountb]; 1893 } 1894 nzcountb++; 1895 } 1896 } 1897 /* zero out the mask elements we set */ 1898 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1899 } 1900 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1901 1902 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1903 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1904 ierr = PetscFree(aa);CHKERRQ(ierr); 1905 ierr = PetscFree(jj);CHKERRQ(ierr); 1906 ierr = PetscFree(mask);CHKERRQ(ierr); 1907 1908 B->assembled = PETSC_TRUE; 1909 ierr = MatView_Private(B);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 EXTERN_C_END 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1916 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1917 { 1918 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1919 MatScalar *aa=a->a,*v,*v1; 1920 PetscScalar *x,*b,*t,sum,d; 1921 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1922 int nz,nz1,*vj,*vj1,i; 1923 1924 PetscFunctionBegin; 1925 its = its*lits; 1926 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1927 1928 if (bs > 1) 1929 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1930 1931 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1932 if (xx != bb) { 1933 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1934 } else { 1935 b = x; 1936 } 1937 1938 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1939 1940 if (flag & SOR_ZERO_INITIAL_GUESS) { 1941 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1942 for (i=0; i<m; i++) 1943 t[i] = b[i]; 1944 1945 for (i=0; i<m; i++){ 1946 d = *(aa + ai[i]); /* diag[i] */ 1947 v = aa + ai[i] + 1; 1948 vj = aj + ai[i] + 1; 1949 nz = ai[i+1] - ai[i] - 1; 1950 x[i] = omega*t[i]/d; 1951 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1952 PetscLogFlops(2*nz-1); 1953 } 1954 } 1955 1956 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1957 for (i=0; i<m; i++) 1958 t[i] = b[i]; 1959 1960 for (i=0; i<m-1; i++){ /* update rhs */ 1961 v = aa + ai[i] + 1; 1962 vj = aj + ai[i] + 1; 1963 nz = ai[i+1] - ai[i] - 1; 1964 while (nz--) t[*vj++] -= x[i]*(*v++); 1965 PetscLogFlops(2*nz-1); 1966 } 1967 for (i=m-1; i>=0; i--){ 1968 d = *(aa + ai[i]); 1969 v = aa + ai[i] + 1; 1970 vj = aj + ai[i] + 1; 1971 nz = ai[i+1] - ai[i] - 1; 1972 sum = t[i]; 1973 while (nz--) sum -= x[*vj++]*(*v++); 1974 PetscLogFlops(2*nz-1); 1975 x[i] = (1-omega)*x[i] + omega*sum/d; 1976 } 1977 } 1978 its--; 1979 } 1980 1981 while (its--) { 1982 /* 1983 forward sweep: 1984 for i=0,...,m-1: 1985 sum[i] = (b[i] - U(i,:)x )/d[i]; 1986 x[i] = (1-omega)x[i] + omega*sum[i]; 1987 b = b - x[i]*U^T(i,:); 1988 1989 */ 1990 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1991 for (i=0; i<m; i++) 1992 t[i] = b[i]; 1993 1994 for (i=0; i<m; i++){ 1995 d = *(aa + ai[i]); /* diag[i] */ 1996 v = aa + ai[i] + 1; v1=v; 1997 vj = aj + ai[i] + 1; vj1=vj; 1998 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1999 sum = t[i]; 2000 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2001 x[i] = (1-omega)*x[i] + omega*sum/d; 2002 while (nz--) t[*vj++] -= x[i]*(*v++); 2003 PetscLogFlops(4*nz-2); 2004 } 2005 } 2006 2007 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2008 /* 2009 backward sweep: 2010 b = b - x[i]*U^T(i,:), i=0,...,n-2 2011 for i=m-1,...,0: 2012 sum[i] = (b[i] - U(i,:)x )/d[i]; 2013 x[i] = (1-omega)x[i] + omega*sum[i]; 2014 */ 2015 for (i=0; i<m; i++) 2016 t[i] = b[i]; 2017 2018 for (i=0; i<m-1; i++){ /* update rhs */ 2019 v = aa + ai[i] + 1; 2020 vj = aj + ai[i] + 1; 2021 nz = ai[i+1] - ai[i] - 1; 2022 while (nz--) t[*vj++] -= x[i]*(*v++); 2023 PetscLogFlops(2*nz-1); 2024 } 2025 for (i=m-1; i>=0; i--){ 2026 d = *(aa + ai[i]); 2027 v = aa + ai[i] + 1; 2028 vj = aj + ai[i] + 1; 2029 nz = ai[i+1] - ai[i] - 1; 2030 sum = t[i]; 2031 while (nz--) sum -= x[*vj++]*(*v++); 2032 PetscLogFlops(2*nz-1); 2033 x[i] = (1-omega)*x[i] + omega*sum/d; 2034 } 2035 } 2036 } 2037 2038 ierr = PetscFree(t); CHKERRQ(ierr); 2039 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2040 if (bb != xx) { 2041 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2042 } 2043 2044 PetscFunctionReturn(0); 2045 } 2046 2047 2048 2049 2050 2051 2052