1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 129 if (a->inew){ 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 a->inew = 0; 132 } 133 ierr = PetscFree(a);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 139 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 140 { 141 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 142 143 PetscFunctionBegin; 144 switch (op) { 145 case MAT_ROW_ORIENTED: 146 a->roworiented = PETSC_TRUE; 147 break; 148 case MAT_COLUMN_ORIENTED: 149 a->roworiented = PETSC_FALSE; 150 break; 151 case MAT_COLUMNS_SORTED: 152 a->sorted = PETSC_TRUE; 153 break; 154 case MAT_COLUMNS_UNSORTED: 155 a->sorted = PETSC_FALSE; 156 break; 157 case MAT_KEEP_ZEROED_ROWS: 158 a->keepzeroedrows = PETSC_TRUE; 159 break; 160 case MAT_NO_NEW_NONZERO_LOCATIONS: 161 a->nonew = 1; 162 break; 163 case MAT_NEW_NONZERO_LOCATION_ERR: 164 a->nonew = -1; 165 break; 166 case MAT_NEW_NONZERO_ALLOCATION_ERR: 167 a->nonew = -2; 168 break; 169 case MAT_YES_NEW_NONZERO_LOCATIONS: 170 a->nonew = 0; 171 break; 172 case MAT_ROWS_SORTED: 173 case MAT_ROWS_UNSORTED: 174 case MAT_YES_NEW_DIAGONALS: 175 case MAT_IGNORE_OFF_PROC_ENTRIES: 176 case MAT_USE_HASH_TABLE: 177 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 178 break; 179 case MAT_NO_NEW_DIAGONALS: 180 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 181 default: 182 SETERRQ(PETSC_ERR_SUP,"unknown option"); 183 } 184 PetscFunctionReturn(0); 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 189 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 190 { 191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 192 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 193 MatScalar *aa,*aa_i; 194 PetscScalar *v_i; 195 196 PetscFunctionBegin; 197 bs = a->bs; 198 ai = a->i; 199 aj = a->j; 200 aa = a->a; 201 bs2 = a->bs2; 202 203 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 204 205 bn = row/bs; /* Block number */ 206 bp = row % bs; /* Block position */ 207 M = ai[bn+1] - ai[bn]; 208 *ncols = bs*M; 209 210 if (v) { 211 *v = 0; 212 if (*ncols) { 213 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 214 for (i=0; i<M; i++) { /* for each block in the block row */ 215 v_i = *v + i*bs; 216 aa_i = aa + bs2*(ai[bn] + i); 217 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 218 } 219 } 220 } 221 222 if (cols) { 223 *cols = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 cols_i = *cols + i*bs; 228 itmp = bs*aj[ai[bn] + i]; 229 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 230 } 231 } 232 } 233 234 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 235 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 236 #ifdef column_search 237 v_i = *v + M*bs; 238 cols_i = *cols + M*bs; 239 for (i=0; i<bn; i++){ /* for each block row */ 240 M = ai[i+1] - ai[i]; 241 for (j=0; j<M; j++){ 242 itmp = aj[ai[i] + j]; /* block column value */ 243 if (itmp == bn){ 244 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 245 for (k=0; k<bs; k++) { 246 *cols_i++ = i*bs+k; 247 *v_i++ = aa_i[k]; 248 } 249 *ncols += bs; 250 break; 251 } 252 } 253 } 254 #endif 255 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 261 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 262 { 263 int ierr; 264 265 PetscFunctionBegin; 266 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 267 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 273 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 274 { 275 int ierr; 276 PetscFunctionBegin; 277 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 283 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 284 { 285 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 286 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 287 PetscScalar *aa; 288 FILE *file; 289 290 PetscFunctionBegin; 291 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 292 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 293 col_lens[0] = MAT_FILE_COOKIE; 294 295 col_lens[1] = A->m; 296 col_lens[2] = A->m; 297 col_lens[3] = a->s_nz*bs2; 298 299 /* store lengths of each row and write (including header) to file */ 300 count = 0; 301 for (i=0; i<a->mbs; i++) { 302 for (j=0; j<bs; j++) { 303 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 304 } 305 } 306 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 307 ierr = PetscFree(col_lens);CHKERRQ(ierr); 308 309 /* store column indices (zero start index) */ 310 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 311 count = 0; 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 for (k=a->i[i]; k<a->i[i+1]; k++) { 315 for (l=0; l<bs; l++) { 316 jj[count++] = bs*a->j[k] + l; 317 } 318 } 319 } 320 } 321 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 322 ierr = PetscFree(jj);CHKERRQ(ierr); 323 324 /* store nonzero values */ 325 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 326 count = 0; 327 for (i=0; i<a->mbs; i++) { 328 for (j=0; j<bs; j++) { 329 for (k=a->i[i]; k<a->i[i+1]; k++) { 330 for (l=0; l<bs; l++) { 331 aa[count++] = a->a[bs2*k + l*bs + j]; 332 } 333 } 334 } 335 } 336 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 337 ierr = PetscFree(aa);CHKERRQ(ierr); 338 339 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 340 if (file) { 341 fprintf(file,"-matload_block_size %d\n",a->bs); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 348 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 349 { 350 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 351 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 352 char *name; 353 PetscViewerFormat format; 354 355 PetscFunctionBegin; 356 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 357 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 358 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 359 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 360 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 361 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 362 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 364 for (i=0; i<a->mbs; i++) { 365 for (j=0; j<bs; j++) { 366 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 367 for (k=a->i[i]; k<a->i[i+1]; k++) { 368 for (l=0; l<bs; l++) { 369 #if defined(PETSC_USE_COMPLEX) 370 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 371 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 372 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 373 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 374 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 375 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 376 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 377 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 378 } 379 #else 380 if (a->a[bs2*k + l*bs + j] != 0.0) { 381 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 382 } 383 #endif 384 } 385 } 386 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 387 } 388 } 389 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 390 } else { 391 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 392 for (i=0; i<a->mbs; i++) { 393 for (j=0; j<bs; j++) { 394 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 395 for (k=a->i[i]; k<a->i[i+1]; k++) { 396 for (l=0; l<bs; l++) { 397 #if defined(PETSC_USE_COMPLEX) 398 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 399 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 400 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 401 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 402 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 403 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 404 } else { 405 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 406 } 407 #else 408 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 409 #endif 410 } 411 } 412 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 413 } 414 } 415 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 416 } 417 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 423 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 424 { 425 Mat A = (Mat) Aa; 426 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 427 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 428 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 429 MatScalar *aa; 430 MPI_Comm comm; 431 PetscViewer viewer; 432 433 PetscFunctionBegin; 434 /* 435 This is nasty. If this is called from an originally parallel matrix 436 then all processes call this,but only the first has the matrix so the 437 rest should return immediately. 438 */ 439 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 440 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 441 if (rank) PetscFunctionReturn(0); 442 443 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 444 445 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 446 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 447 448 /* loop over matrix elements drawing boxes */ 449 color = PETSC_DRAW_BLUE; 450 for (i=0,row=0; i<mbs; i++,row+=bs) { 451 for (j=a->i[i]; j<a->i[i+1]; j++) { 452 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 453 x_l = a->j[j]*bs; x_r = x_l + 1.0; 454 aa = a->a + j*bs2; 455 for (k=0; k<bs; k++) { 456 for (l=0; l<bs; l++) { 457 if (PetscRealPart(*aa++) >= 0.) continue; 458 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 459 } 460 } 461 } 462 } 463 color = PETSC_DRAW_CYAN; 464 for (i=0,row=0; i<mbs; i++,row+=bs) { 465 for (j=a->i[i]; j<a->i[i+1]; j++) { 466 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 467 x_l = a->j[j]*bs; x_r = x_l + 1.0; 468 aa = a->a + j*bs2; 469 for (k=0; k<bs; k++) { 470 for (l=0; l<bs; l++) { 471 if (PetscRealPart(*aa++) != 0.) continue; 472 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 473 } 474 } 475 } 476 } 477 478 color = PETSC_DRAW_RED; 479 for (i=0,row=0; i<mbs; i++,row+=bs) { 480 for (j=a->i[i]; j<a->i[i+1]; j++) { 481 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 482 x_l = a->j[j]*bs; x_r = x_l + 1.0; 483 aa = a->a + j*bs2; 484 for (k=0; k<bs; k++) { 485 for (l=0; l<bs; l++) { 486 if (PetscRealPart(*aa++) <= 0.) continue; 487 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 497 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 498 { 499 int ierr; 500 PetscReal xl,yl,xr,yr,w,h; 501 PetscDraw draw; 502 PetscTruth isnull; 503 504 PetscFunctionBegin; 505 506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 508 509 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 510 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 511 xr += w; yr += h; xl = -w; yl = -h; 512 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 513 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 514 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNCT__ 519 #define __FUNCT__ "MatView_SeqSBAIJ" 520 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 521 { 522 int ierr; 523 PetscTruth issocket,isascii,isbinary,isdraw; 524 525 PetscFunctionBegin; 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 530 if (issocket) { 531 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 532 } else if (isascii){ 533 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 534 } else if (isbinary) { 535 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 536 } else if (isdraw) { 537 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 538 } else { 539 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 540 } 541 PetscFunctionReturn(0); 542 } 543 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 547 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 548 { 549 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 550 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 551 int *ai = a->i,*ailen = a->ilen; 552 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 553 MatScalar *ap,*aa = a->a,zero = 0.0; 554 555 PetscFunctionBegin; 556 for (k=0; k<m; k++) { /* loop over rows */ 557 row = im[k]; brow = row/bs; 558 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 559 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 560 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 561 nrow = ailen[brow]; 562 for (l=0; l<n; l++) { /* loop over columns */ 563 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 564 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 565 col = in[l] ; 566 bcol = col/bs; 567 cidx = col%bs; 568 ridx = row%bs; 569 high = nrow; 570 low = 0; /* assume unsorted */ 571 while (high-low > 5) { 572 t = (low+high)/2; 573 if (rp[t] > bcol) high = t; 574 else low = t; 575 } 576 for (i=low; i<high; i++) { 577 if (rp[i] > bcol) break; 578 if (rp[i] == bcol) { 579 *v++ = ap[bs2*i+bs*cidx+ridx]; 580 goto finished; 581 } 582 } 583 *v++ = zero; 584 finished:; 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 593 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 594 { 595 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 596 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 597 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 598 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 599 PetscTruth roworiented=a->roworiented; 600 MatScalar *value = v,*ap,*aa = a->a,*bap; 601 602 PetscFunctionBegin; 603 if (roworiented) { 604 stepval = (n-1)*bs; 605 } else { 606 stepval = (m-1)*bs; 607 } 608 for (k=0; k<m; k++) { /* loop over added rows */ 609 row = im[k]; 610 if (row < 0) continue; 611 #if defined(PETSC_USE_BOPT_g) 612 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 613 #endif 614 rp = aj + ai[row]; 615 ap = aa + bs2*ai[row]; 616 rmax = imax[row]; 617 nrow = ailen[row]; 618 low = 0; 619 for (l=0; l<n; l++) { /* loop over added columns */ 620 if (in[l] < 0) continue; 621 #if defined(PETSC_USE_BOPT_g) 622 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 623 #endif 624 col = in[l]; 625 if (col < row) continue; /* ignore lower triangular block */ 626 if (roworiented) { 627 value = v + k*(stepval+bs)*bs + l*bs; 628 } else { 629 value = v + l*(stepval+bs)*bs + k*bs; 630 } 631 if (!sorted) low = 0; high = nrow; 632 while (high-low > 7) { 633 t = (low+high)/2; 634 if (rp[t] > col) high = t; 635 else low = t; 636 } 637 for (i=low; i<high; i++) { 638 if (rp[i] > col) break; 639 if (rp[i] == col) { 640 bap = ap + bs2*i; 641 if (roworiented) { 642 if (is == ADD_VALUES) { 643 for (ii=0; ii<bs; ii++,value+=stepval) { 644 for (jj=ii; jj<bs2; jj+=bs) { 645 bap[jj] += *value++; 646 } 647 } 648 } else { 649 for (ii=0; ii<bs; ii++,value+=stepval) { 650 for (jj=ii; jj<bs2; jj+=bs) { 651 bap[jj] = *value++; 652 } 653 } 654 } 655 } else { 656 if (is == ADD_VALUES) { 657 for (ii=0; ii<bs; ii++,value+=stepval) { 658 for (jj=0; jj<bs; jj++) { 659 *bap++ += *value++; 660 } 661 } 662 } else { 663 for (ii=0; ii<bs; ii++,value+=stepval) { 664 for (jj=0; jj<bs; jj++) { 665 *bap++ = *value++; 666 } 667 } 668 } 669 } 670 goto noinsert2; 671 } 672 } 673 if (nonew == 1) goto noinsert2; 674 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 675 if (nrow >= rmax) { 676 /* there is no extra room in row, therefore enlarge */ 677 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 678 MatScalar *new_a; 679 680 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 681 682 /* malloc new storage space */ 683 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 684 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 685 new_j = (int*)(new_a + bs2*new_nz); 686 new_i = new_j + new_nz; 687 688 /* copy over old data into new slots */ 689 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 690 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 691 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 692 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 693 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 694 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 695 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 696 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 697 /* free up old matrix storage */ 698 ierr = PetscFree(a->a);CHKERRQ(ierr); 699 if (!a->singlemalloc) { 700 ierr = PetscFree(a->i);CHKERRQ(ierr); 701 ierr = PetscFree(a->j);CHKERRQ(ierr); 702 } 703 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 704 a->singlemalloc = PETSC_TRUE; 705 706 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 707 rmax = imax[row] = imax[row] + CHUNKSIZE; 708 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 709 a->s_maxnz += bs2*CHUNKSIZE; 710 a->reallocs++; 711 a->s_nz++; 712 } 713 N = nrow++ - 1; 714 /* shift up all the later entries in this row */ 715 for (ii=N; ii>=i; ii--) { 716 rp[ii+1] = rp[ii]; 717 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 718 } 719 if (N >= i) { 720 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 721 } 722 rp[i] = col; 723 bap = ap + bs2*i; 724 if (roworiented) { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 bap[jj] = *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval) { 732 for (jj=0; jj<bs; jj++) { 733 *bap++ = *value++; 734 } 735 } 736 } 737 noinsert2:; 738 low = i; 739 } 740 ailen[row] = nrow; 741 } 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 747 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 748 { 749 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 750 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 751 int m = A->m,*ip,N,*ailen = a->ilen; 752 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 753 MatScalar *aa = a->a,*ap; 754 #if defined(PETSC_HAVE_SPOOLES) 755 PetscTruth flag; 756 #endif 757 758 PetscFunctionBegin; 759 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 760 761 if (m) rmax = ailen[0]; 762 for (i=1; i<mbs; i++) { 763 /* move each row back by the amount of empty slots (fshift) before it*/ 764 fshift += imax[i-1] - ailen[i-1]; 765 rmax = PetscMax(rmax,ailen[i]); 766 if (fshift) { 767 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 768 N = ailen[i]; 769 for (j=0; j<N; j++) { 770 ip[j-fshift] = ip[j]; 771 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 772 } 773 } 774 ai[i] = ai[i-1] + ailen[i-1]; 775 } 776 if (mbs) { 777 fshift += imax[mbs-1] - ailen[mbs-1]; 778 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 779 } 780 /* reset ilen and imax for each row */ 781 for (i=0; i<mbs; i++) { 782 ailen[i] = imax[i] = ai[i+1] - ai[i]; 783 } 784 a->s_nz = ai[mbs]; 785 786 /* diagonals may have moved, reset it */ 787 if (a->diag) { 788 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 789 } 790 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 791 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 792 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 793 a->reallocs); 794 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 795 a->reallocs = 0; 796 A->info.nz_unneeded = (PetscReal)fshift*bs2; 797 798 #if defined(PETSC_HAVE_SPOOLES) 799 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 800 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 801 #endif 802 803 PetscFunctionReturn(0); 804 } 805 806 /* 807 This function returns an array of flags which indicate the locations of contiguous 808 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 809 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 810 Assume: sizes should be long enough to hold all the values. 811 */ 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 814 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 815 { 816 int i,j,k,row; 817 PetscTruth flg; 818 819 PetscFunctionBegin; 820 for (i=0,j=0; i<n; j++) { 821 row = idx[i]; 822 if (row%bs!=0) { /* Not the begining of a block */ 823 sizes[j] = 1; 824 i++; 825 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 826 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 827 i++; 828 } else { /* Begining of the block, so check if the complete block exists */ 829 flg = PETSC_TRUE; 830 for (k=1; k<bs; k++) { 831 if (row+k != idx[i+k]) { /* break in the block */ 832 flg = PETSC_FALSE; 833 break; 834 } 835 } 836 if (flg == PETSC_TRUE) { /* No break in the bs */ 837 sizes[j] = bs; 838 i+= bs; 839 } else { 840 sizes[j] = 1; 841 i++; 842 } 843 } 844 } 845 *bs_max = j; 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 851 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 852 { 853 PetscFunctionBegin; 854 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 855 } 856 857 /* Only add/insert a(i,j) with i<=j (blocks). 858 Any a(i,j) with i>j input by user is ingored. 859 */ 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 863 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 864 { 865 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 866 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 867 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 868 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 869 int ridx,cidx,bs2=a->bs2,ierr; 870 MatScalar *ap,value,*aa=a->a,*bap; 871 872 PetscFunctionBegin; 873 874 for (k=0; k<m; k++) { /* loop over added rows */ 875 row = im[k]; /* row number */ 876 brow = row/bs; /* block row number */ 877 if (row < 0) continue; 878 #if defined(PETSC_USE_BOPT_g) 879 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 880 #endif 881 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 882 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 883 rmax = imax[brow]; /* maximum space allocated for this row */ 884 nrow = ailen[brow]; /* actual length of this row */ 885 low = 0; 886 887 for (l=0; l<n; l++) { /* loop over added columns */ 888 if (in[l] < 0) continue; 889 #if defined(PETSC_USE_BOPT_g) 890 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 891 #endif 892 col = in[l]; 893 bcol = col/bs; /* block col number */ 894 895 if (brow <= bcol){ 896 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 897 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 898 /* element value a(k,l) */ 899 if (roworiented) { 900 value = v[l + k*n]; 901 } else { 902 value = v[k + l*m]; 903 } 904 905 /* move pointer bap to a(k,l) quickly and add/insert value */ 906 if (!sorted) low = 0; high = nrow; 907 while (high-low > 7) { 908 t = (low+high)/2; 909 if (rp[t] > bcol) high = t; 910 else low = t; 911 } 912 for (i=low; i<high; i++) { 913 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 914 if (rp[i] > bcol) break; 915 if (rp[i] == bcol) { 916 bap = ap + bs2*i + bs*cidx + ridx; 917 if (is == ADD_VALUES) *bap += value; 918 else *bap = value; 919 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 920 if (brow == bcol && ridx < cidx){ 921 bap = ap + bs2*i + bs*ridx + cidx; 922 if (is == ADD_VALUES) *bap += value; 923 else *bap = value; 924 } 925 goto noinsert1; 926 } 927 } 928 929 if (nonew == 1) goto noinsert1; 930 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 931 if (nrow >= rmax) { 932 /* there is no extra room in row, therefore enlarge */ 933 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 934 MatScalar *new_a; 935 936 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 937 938 /* Malloc new storage space */ 939 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 940 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 941 new_j = (int*)(new_a + bs2*new_nz); 942 new_i = new_j + new_nz; 943 944 /* copy over old data into new slots */ 945 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 946 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 947 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 948 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 949 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 950 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 951 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 952 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 953 /* free up old matrix storage */ 954 ierr = PetscFree(a->a);CHKERRQ(ierr); 955 if (!a->singlemalloc) { 956 ierr = PetscFree(a->i);CHKERRQ(ierr); 957 ierr = PetscFree(a->j);CHKERRQ(ierr); 958 } 959 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 960 a->singlemalloc = PETSC_TRUE; 961 962 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 963 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 964 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 965 a->s_maxnz += bs2*CHUNKSIZE; 966 a->reallocs++; 967 a->s_nz++; 968 } 969 970 N = nrow++ - 1; 971 /* shift up all the later entries in this row */ 972 for (ii=N; ii>=i; ii--) { 973 rp[ii+1] = rp[ii]; 974 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 975 } 976 if (N>=i) { 977 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 978 } 979 rp[i] = bcol; 980 ap[bs2*i + bs*cidx + ridx] = value; 981 noinsert1:; 982 low = i; 983 /* } */ 984 } 985 } /* end of if .. if.. */ 986 } /* end of loop over added columns */ 987 ailen[brow] = nrow; 988 } /* end of loop over added rows */ 989 990 PetscFunctionReturn(0); 991 } 992 993 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 994 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 995 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 996 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 997 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 998 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 999 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1000 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1001 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1002 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1003 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1004 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1005 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1006 extern int MatZeroEntries_SeqSBAIJ(Mat); 1007 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1008 1009 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1010 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1011 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1012 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1013 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1014 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1017 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1018 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1019 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1020 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1021 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1024 1025 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1026 1027 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1028 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1029 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1030 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1031 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1035 1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1044 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1045 1046 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1047 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1048 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1049 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1050 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1051 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1054 1055 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1056 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1057 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1058 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1059 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1060 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1063 1064 #ifdef HAVE_MatICCFactor 1065 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1068 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1069 { 1070 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1071 Mat outA; 1072 int ierr; 1073 PetscTruth row_identity,col_identity; 1074 1075 PetscFunctionBegin; 1076 /* 1077 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1078 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1079 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1080 if (!row_identity || !col_identity) { 1081 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1082 } 1083 */ 1084 1085 outA = inA; 1086 inA->factor = FACTOR_CHOLESKY; 1087 1088 if (!a->diag) { 1089 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1090 } 1091 /* 1092 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1093 for ILU(0) factorization with natural ordering 1094 */ 1095 switch (a->bs) { 1096 case 1: 1097 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1098 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1099 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1100 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1101 case 2: 1102 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1103 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1104 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1105 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1106 break; 1107 case 3: 1108 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1109 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1110 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1111 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1112 break; 1113 case 4: 1114 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1115 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1116 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1117 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1118 break; 1119 case 5: 1120 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1121 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1122 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1123 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1124 break; 1125 case 6: 1126 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1127 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1128 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1129 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1130 break; 1131 case 7: 1132 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1133 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1134 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1135 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1136 break; 1137 default: 1138 a->row = row; 1139 a->icol = col; 1140 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1141 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1142 1143 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1144 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1145 PetscLogObjectParent(inA,a->icol); 1146 1147 if (!a->solve_work) { 1148 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1149 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1150 } 1151 } 1152 1153 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1154 1155 PetscFunctionReturn(0); 1156 } 1157 #endif 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1161 int MatPrintHelp_SeqSBAIJ(Mat A) 1162 { 1163 static PetscTruth called = PETSC_FALSE; 1164 MPI_Comm comm = A->comm; 1165 int ierr; 1166 1167 PetscFunctionBegin; 1168 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1169 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1170 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 EXTERN_C_BEGIN 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1177 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1178 { 1179 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1180 int i,nz,n; 1181 1182 PetscFunctionBegin; 1183 nz = baij->s_maxnz; 1184 n = mat->n; 1185 for (i=0; i<nz; i++) { 1186 baij->j[i] = indices[i]; 1187 } 1188 baij->s_nz = nz; 1189 for (i=0; i<n; i++) { 1190 baij->ilen[i] = baij->imax[i]; 1191 } 1192 1193 PetscFunctionReturn(0); 1194 } 1195 EXTERN_C_END 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1199 /*@ 1200 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1201 in the matrix. 1202 1203 Input Parameters: 1204 + mat - the SeqSBAIJ matrix 1205 - indices - the column indices 1206 1207 Level: advanced 1208 1209 Notes: 1210 This can be called if you have precomputed the nonzero structure of the 1211 matrix and want to provide it to the matrix object to improve the performance 1212 of the MatSetValues() operation. 1213 1214 You MUST have set the correct numbers of nonzeros per row in the call to 1215 MatCreateSeqSBAIJ(). 1216 1217 MUST be called before any calls to MatSetValues() 1218 1219 .seealso: MatCreateSeqSBAIJ 1220 @*/ 1221 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1222 { 1223 int ierr,(*f)(Mat,int *); 1224 1225 PetscFunctionBegin; 1226 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1227 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1228 if (f) { 1229 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1230 } else { 1231 SETERRQ(1,"Wrong type of matrix to set column indices"); 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1238 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1239 { 1240 int ierr; 1241 1242 PetscFunctionBegin; 1243 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1249 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array) 1250 { 1251 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1252 PetscFunctionBegin; 1253 *array = a->a; 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1259 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array) 1260 { 1261 PetscFunctionBegin; 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #include "petscblaslapack.h" 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1268 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1269 { 1270 int ierr,one=1; 1271 Mat_SeqSBAIJ *x = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data; 1272 1273 PetscFunctionBegin; 1274 if (str == SAME_NONZERO_PATTERN) { 1275 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1276 } else { 1277 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1278 } 1279 PetscFunctionReturn(0); 1280 } 1281 1282 /* -------------------------------------------------------------------*/ 1283 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1284 MatGetRow_SeqSBAIJ, 1285 MatRestoreRow_SeqSBAIJ, 1286 MatMult_SeqSBAIJ_N, 1287 MatMultAdd_SeqSBAIJ_N, 1288 MatMultTranspose_SeqSBAIJ, 1289 MatMultTransposeAdd_SeqSBAIJ, 1290 MatSolve_SeqSBAIJ_N, 1291 0, 1292 0, 1293 0, 1294 0, 1295 MatCholeskyFactor_SeqSBAIJ, 1296 MatRelax_SeqSBAIJ, 1297 MatTranspose_SeqSBAIJ, 1298 MatGetInfo_SeqSBAIJ, 1299 MatEqual_SeqSBAIJ, 1300 MatGetDiagonal_SeqSBAIJ, 1301 MatDiagonalScale_SeqSBAIJ, 1302 MatNorm_SeqSBAIJ, 1303 0, 1304 MatAssemblyEnd_SeqSBAIJ, 1305 0, 1306 MatSetOption_SeqSBAIJ, 1307 MatZeroEntries_SeqSBAIJ, 1308 MatZeroRows_SeqSBAIJ, 1309 0, 1310 0, 1311 MatCholeskyFactorSymbolic_SeqSBAIJ, 1312 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1313 MatSetUpPreallocation_SeqSBAIJ, 1314 0, 1315 MatICCFactorSymbolic_SeqSBAIJ, 1316 MatGetArray_SeqSBAIJ, 1317 MatRestoreArray_SeqSBAIJ, 1318 MatDuplicate_SeqSBAIJ, 1319 0, 1320 0, 1321 0, 1322 0, 1323 MatAXPY_SeqSBAIJ, 1324 MatGetSubMatrices_SeqSBAIJ, 1325 MatIncreaseOverlap_SeqSBAIJ, 1326 MatGetValues_SeqSBAIJ, 1327 0, 1328 MatPrintHelp_SeqSBAIJ, 1329 MatScale_SeqSBAIJ, 1330 0, 1331 0, 1332 0, 1333 MatGetBlockSize_SeqSBAIJ, 1334 MatGetRowIJ_SeqSBAIJ, 1335 MatRestoreRowIJ_SeqSBAIJ, 1336 0, 1337 0, 1338 0, 1339 0, 1340 0, 1341 0, 1342 MatSetValuesBlocked_SeqSBAIJ, 1343 MatGetSubMatrix_SeqSBAIJ, 1344 0, 1345 0, 1346 MatGetPetscMaps_Petsc, 1347 0, 1348 0, 1349 0, 1350 0, 1351 0, 1352 0, 1353 MatGetRowMax_SeqSBAIJ, 1354 0, 1355 0, 1356 0, 1357 0, 1358 0, 1359 0, 1360 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 0, 1366 0, 1367 #if !defined(PETSC_USE_COMPLEX) 1368 MatGetInertia_SeqSBAIJ 1369 #else 1370 0 1371 #endif 1372 }; 1373 1374 EXTERN_C_BEGIN 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1377 int MatStoreValues_SeqSBAIJ(Mat mat) 1378 { 1379 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1380 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1381 int ierr; 1382 1383 PetscFunctionBegin; 1384 if (aij->nonew != 1) { 1385 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1386 } 1387 1388 /* allocate space for values if not already there */ 1389 if (!aij->saved_values) { 1390 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1391 } 1392 1393 /* copy values over */ 1394 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 EXTERN_C_END 1398 1399 EXTERN_C_BEGIN 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1402 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1403 { 1404 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1405 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1406 1407 PetscFunctionBegin; 1408 if (aij->nonew != 1) { 1409 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1410 } 1411 if (!aij->saved_values) { 1412 SETERRQ(1,"Must call MatStoreValues(A);first"); 1413 } 1414 1415 /* copy values over */ 1416 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 EXTERN_C_END 1420 1421 EXTERN_C_BEGIN 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1424 int MatCreate_SeqSBAIJ(Mat B) 1425 { 1426 Mat_SeqSBAIJ *b; 1427 int ierr,size; 1428 1429 PetscFunctionBegin; 1430 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1431 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1432 B->m = B->M = PetscMax(B->m,B->M); 1433 B->n = B->N = PetscMax(B->n,B->N); 1434 1435 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1436 B->data = (void*)b; 1437 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1438 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1439 B->ops->destroy = MatDestroy_SeqSBAIJ; 1440 B->ops->view = MatView_SeqSBAIJ; 1441 B->factor = 0; 1442 B->lupivotthreshold = 1.0; 1443 B->mapping = 0; 1444 b->row = 0; 1445 b->icol = 0; 1446 b->reallocs = 0; 1447 b->saved_values = 0; 1448 1449 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1450 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1451 1452 b->sorted = PETSC_FALSE; 1453 b->roworiented = PETSC_TRUE; 1454 b->nonew = 0; 1455 b->diag = 0; 1456 b->solve_work = 0; 1457 b->mult_work = 0; 1458 B->spptr = 0; 1459 b->keepzeroedrows = PETSC_FALSE; 1460 1461 b->inew = 0; 1462 b->jnew = 0; 1463 b->anew = 0; 1464 b->a2anew = 0; 1465 b->permute = PETSC_FALSE; 1466 1467 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1468 "MatStoreValues_SeqSBAIJ", 1469 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1470 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1471 "MatRetrieveValues_SeqSBAIJ", 1472 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1473 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1474 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1475 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 EXTERN_C_END 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1482 /*@C 1483 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1484 compressed row) format. For good matrix assembly performance the 1485 user should preallocate the matrix storage by setting the parameter nz 1486 (or the array nnz). By setting these parameters accurately, performance 1487 during matrix assembly can be increased by more than a factor of 50. 1488 1489 Collective on Mat 1490 1491 Input Parameters: 1492 + A - the symmetric matrix 1493 . bs - size of block 1494 . nz - number of block nonzeros per block row (same for all rows) 1495 - nnz - array containing the number of block nonzeros in the upper triangular plus 1496 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1497 1498 Options Database Keys: 1499 . -mat_no_unroll - uses code that does not unroll the loops in the 1500 block calculations (much slower) 1501 . -mat_block_size - size of the blocks to use 1502 1503 Level: intermediate 1504 1505 Notes: 1506 Specify the preallocated storage with either nz or nnz (not both). 1507 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1508 allocation. For additional details, see the users manual chapter on 1509 matrices. 1510 1511 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1512 @*/ 1513 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1514 { 1515 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1516 int i,len,ierr,mbs,bs2; 1517 PetscTruth flg; 1518 int s_nz; 1519 1520 PetscFunctionBegin; 1521 B->preallocated = PETSC_TRUE; 1522 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1523 mbs = B->m/bs; 1524 bs2 = bs*bs; 1525 1526 if (mbs*bs != B->m) { 1527 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1528 } 1529 1530 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1531 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1532 if (nnz) { 1533 for (i=0; i<mbs; i++) { 1534 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1535 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1536 } 1537 } 1538 1539 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1540 if (!flg) { 1541 switch (bs) { 1542 case 1: 1543 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1544 B->ops->solve = MatSolve_SeqSBAIJ_1; 1545 B->ops->solves = MatSolves_SeqSBAIJ_1; 1546 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1547 B->ops->mult = MatMult_SeqSBAIJ_1; 1548 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1549 break; 1550 case 2: 1551 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1552 B->ops->solve = MatSolve_SeqSBAIJ_2; 1553 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1554 B->ops->mult = MatMult_SeqSBAIJ_2; 1555 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1556 break; 1557 case 3: 1558 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1559 B->ops->solve = MatSolve_SeqSBAIJ_3; 1560 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1561 B->ops->mult = MatMult_SeqSBAIJ_3; 1562 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1563 break; 1564 case 4: 1565 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1566 B->ops->solve = MatSolve_SeqSBAIJ_4; 1567 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1568 B->ops->mult = MatMult_SeqSBAIJ_4; 1569 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1570 break; 1571 case 5: 1572 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1573 B->ops->solve = MatSolve_SeqSBAIJ_5; 1574 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1575 B->ops->mult = MatMult_SeqSBAIJ_5; 1576 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1577 break; 1578 case 6: 1579 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1580 B->ops->solve = MatSolve_SeqSBAIJ_6; 1581 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1582 B->ops->mult = MatMult_SeqSBAIJ_6; 1583 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1584 break; 1585 case 7: 1586 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1587 B->ops->solve = MatSolve_SeqSBAIJ_7; 1588 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1589 B->ops->mult = MatMult_SeqSBAIJ_7; 1590 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1591 break; 1592 } 1593 } 1594 1595 b->mbs = mbs; 1596 b->nbs = mbs; 1597 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1598 if (!nnz) { 1599 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1600 else if (nz <= 0) nz = 1; 1601 for (i=0; i<mbs; i++) { 1602 b->imax[i] = nz; 1603 } 1604 nz = nz*mbs; /* total nz */ 1605 } else { 1606 nz = 0; 1607 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1608 } 1609 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1610 s_nz = nz; 1611 1612 /* allocate the matrix space */ 1613 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1614 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1615 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1616 b->j = (int*)(b->a + s_nz*bs2); 1617 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1618 b->i = b->j + s_nz; 1619 b->singlemalloc = PETSC_TRUE; 1620 1621 /* pointer to beginning of each row */ 1622 b->i[0] = 0; 1623 for (i=1; i<mbs+1; i++) { 1624 b->i[i] = b->i[i-1] + b->imax[i-1]; 1625 } 1626 1627 /* b->ilen will count nonzeros in each block row so far. */ 1628 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1629 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1630 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1631 1632 b->bs = bs; 1633 b->bs2 = bs2; 1634 b->s_nz = 0; 1635 b->s_maxnz = s_nz*bs2; 1636 1637 b->inew = 0; 1638 b->jnew = 0; 1639 b->anew = 0; 1640 b->a2anew = 0; 1641 b->permute = PETSC_FALSE; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatCreateSeqSBAIJ" 1648 /*@C 1649 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1650 compressed row) format. For good matrix assembly performance the 1651 user should preallocate the matrix storage by setting the parameter nz 1652 (or the array nnz). By setting these parameters accurately, performance 1653 during matrix assembly can be increased by more than a factor of 50. 1654 1655 Collective on MPI_Comm 1656 1657 Input Parameters: 1658 + comm - MPI communicator, set to PETSC_COMM_SELF 1659 . bs - size of block 1660 . m - number of rows, or number of columns 1661 . nz - number of block nonzeros per block row (same for all rows) 1662 - nnz - array containing the number of block nonzeros in the upper triangular plus 1663 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1664 1665 Output Parameter: 1666 . A - the symmetric matrix 1667 1668 Options Database Keys: 1669 . -mat_no_unroll - uses code that does not unroll the loops in the 1670 block calculations (much slower) 1671 . -mat_block_size - size of the blocks to use 1672 1673 Level: intermediate 1674 1675 Notes: 1676 1677 Specify the preallocated storage with either nz or nnz (not both). 1678 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1679 allocation. For additional details, see the users manual chapter on 1680 matrices. 1681 1682 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1683 @*/ 1684 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1685 { 1686 int ierr; 1687 1688 PetscFunctionBegin; 1689 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1690 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1691 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1697 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1698 { 1699 Mat C; 1700 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1701 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1702 1703 PetscFunctionBegin; 1704 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1705 1706 *B = 0; 1707 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1708 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1709 c = (Mat_SeqSBAIJ*)C->data; 1710 1711 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1712 C->preallocated = PETSC_TRUE; 1713 C->factor = A->factor; 1714 c->row = 0; 1715 c->icol = 0; 1716 c->saved_values = 0; 1717 c->keepzeroedrows = a->keepzeroedrows; 1718 C->assembled = PETSC_TRUE; 1719 1720 c->bs = a->bs; 1721 c->bs2 = a->bs2; 1722 c->mbs = a->mbs; 1723 c->nbs = a->nbs; 1724 1725 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1726 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1727 for (i=0; i<mbs; i++) { 1728 c->imax[i] = a->imax[i]; 1729 c->ilen[i] = a->ilen[i]; 1730 } 1731 1732 /* allocate the matrix space */ 1733 c->singlemalloc = PETSC_TRUE; 1734 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1735 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1736 c->j = (int*)(c->a + nz*bs2); 1737 c->i = c->j + nz; 1738 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1739 if (mbs > 0) { 1740 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1741 if (cpvalues == MAT_COPY_VALUES) { 1742 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1743 } else { 1744 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1745 } 1746 } 1747 1748 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1749 c->sorted = a->sorted; 1750 c->roworiented = a->roworiented; 1751 c->nonew = a->nonew; 1752 1753 if (a->diag) { 1754 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1755 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1756 for (i=0; i<mbs; i++) { 1757 c->diag[i] = a->diag[i]; 1758 } 1759 } else c->diag = 0; 1760 c->s_nz = a->s_nz; 1761 c->s_maxnz = a->s_maxnz; 1762 c->solve_work = 0; 1763 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1764 c->mult_work = 0; 1765 *B = C; 1766 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 EXTERN_C_BEGIN 1771 #undef __FUNCT__ 1772 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1773 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1774 { 1775 Mat_SeqSBAIJ *a; 1776 Mat B; 1777 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1778 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1779 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1780 int *masked,nmask,tmp,bs2,ishift; 1781 PetscScalar *aa; 1782 MPI_Comm comm = ((PetscObject)viewer)->comm; 1783 #if defined(PETSC_HAVE_SPOOLES) 1784 PetscTruth flag; 1785 #endif 1786 1787 PetscFunctionBegin; 1788 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1789 bs2 = bs*bs; 1790 1791 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1792 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1793 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1794 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1795 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1796 M = header[1]; N = header[2]; nz = header[3]; 1797 1798 if (header[3] < 0) { 1799 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1800 } 1801 1802 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1803 1804 /* 1805 This code adds extra rows to make sure the number of rows is 1806 divisible by the blocksize 1807 */ 1808 mbs = M/bs; 1809 extra_rows = bs - M + bs*(mbs); 1810 if (extra_rows == bs) extra_rows = 0; 1811 else mbs++; 1812 if (extra_rows) { 1813 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1814 } 1815 1816 /* read in row lengths */ 1817 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1818 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1819 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1820 1821 /* read in column indices */ 1822 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1823 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1824 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1825 1826 /* loop over row lengths determining block row lengths */ 1827 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1828 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1829 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1830 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1831 masked = mask + mbs; 1832 rowcount = 0; nzcount = 0; 1833 for (i=0; i<mbs; i++) { 1834 nmask = 0; 1835 for (j=0; j<bs; j++) { 1836 kmax = rowlengths[rowcount]; 1837 for (k=0; k<kmax; k++) { 1838 tmp = jj[nzcount++]/bs; /* block col. index */ 1839 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1840 } 1841 rowcount++; 1842 } 1843 s_browlengths[i] += nmask; 1844 1845 /* zero out the mask elements we set */ 1846 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1847 } 1848 1849 /* create our matrix */ 1850 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1851 B = *A; 1852 a = (Mat_SeqSBAIJ*)B->data; 1853 1854 /* set matrix "i" values */ 1855 a->i[0] = 0; 1856 for (i=1; i<= mbs; i++) { 1857 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1858 a->ilen[i-1] = s_browlengths[i-1]; 1859 } 1860 a->s_nz = a->i[mbs]; 1861 1862 /* read in nonzero values */ 1863 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1864 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1865 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1866 1867 /* set "a" and "j" values into matrix */ 1868 nzcount = 0; jcount = 0; 1869 for (i=0; i<mbs; i++) { 1870 nzcountb = nzcount; 1871 nmask = 0; 1872 for (j=0; j<bs; j++) { 1873 kmax = rowlengths[i*bs+j]; 1874 for (k=0; k<kmax; k++) { 1875 tmp = jj[nzcount++]/bs; /* block col. index */ 1876 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1877 } 1878 } 1879 /* sort the masked values */ 1880 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1881 1882 /* set "j" values into matrix */ 1883 maskcount = 1; 1884 for (j=0; j<nmask; j++) { 1885 a->j[jcount++] = masked[j]; 1886 mask[masked[j]] = maskcount++; 1887 } 1888 1889 /* set "a" values into matrix */ 1890 ishift = bs2*a->i[i]; 1891 for (j=0; j<bs; j++) { 1892 kmax = rowlengths[i*bs+j]; 1893 for (k=0; k<kmax; k++) { 1894 tmp = jj[nzcountb]/bs ; /* block col. index */ 1895 if (tmp >= i){ 1896 block = mask[tmp] - 1; 1897 point = jj[nzcountb] - bs*tmp; 1898 idx = ishift + bs2*block + j + bs*point; 1899 a->a[idx] = aa[nzcountb]; 1900 } 1901 nzcountb++; 1902 } 1903 } 1904 /* zero out the mask elements we set */ 1905 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1906 } 1907 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1908 1909 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1910 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1911 ierr = PetscFree(aa);CHKERRQ(ierr); 1912 ierr = PetscFree(jj);CHKERRQ(ierr); 1913 ierr = PetscFree(mask);CHKERRQ(ierr); 1914 1915 B->assembled = PETSC_TRUE; 1916 #if defined(PETSC_HAVE_SPOOLES) 1917 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 1918 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); } 1919 #endif 1920 ierr = MatView_Private(B);CHKERRQ(ierr); 1921 PetscFunctionReturn(0); 1922 } 1923 EXTERN_C_END 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1927 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1928 { 1929 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1930 MatScalar *aa=a->a,*v,*v1; 1931 PetscScalar *x,*b,*t,sum,d; 1932 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1933 int nz,nz1,*vj,*vj1,i; 1934 1935 PetscFunctionBegin; 1936 its = its*lits; 1937 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1938 1939 if (bs > 1) 1940 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1941 1942 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1943 if (xx != bb) { 1944 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1945 } else { 1946 b = x; 1947 } 1948 1949 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1950 1951 if (flag & SOR_ZERO_INITIAL_GUESS) { 1952 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1953 for (i=0; i<m; i++) 1954 t[i] = b[i]; 1955 1956 for (i=0; i<m; i++){ 1957 d = *(aa + ai[i]); /* diag[i] */ 1958 v = aa + ai[i] + 1; 1959 vj = aj + ai[i] + 1; 1960 nz = ai[i+1] - ai[i] - 1; 1961 x[i] = omega*t[i]/d; 1962 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1963 PetscLogFlops(2*nz-1); 1964 } 1965 } 1966 1967 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1968 for (i=0; i<m; i++) 1969 t[i] = b[i]; 1970 1971 for (i=0; i<m-1; i++){ /* update rhs */ 1972 v = aa + ai[i] + 1; 1973 vj = aj + ai[i] + 1; 1974 nz = ai[i+1] - ai[i] - 1; 1975 while (nz--) t[*vj++] -= x[i]*(*v++); 1976 PetscLogFlops(2*nz-1); 1977 } 1978 for (i=m-1; i>=0; i--){ 1979 d = *(aa + ai[i]); 1980 v = aa + ai[i] + 1; 1981 vj = aj + ai[i] + 1; 1982 nz = ai[i+1] - ai[i] - 1; 1983 sum = t[i]; 1984 while (nz--) sum -= x[*vj++]*(*v++); 1985 PetscLogFlops(2*nz-1); 1986 x[i] = (1-omega)*x[i] + omega*sum/d; 1987 } 1988 } 1989 its--; 1990 } 1991 1992 while (its--) { 1993 /* 1994 forward sweep: 1995 for i=0,...,m-1: 1996 sum[i] = (b[i] - U(i,:)x )/d[i]; 1997 x[i] = (1-omega)x[i] + omega*sum[i]; 1998 b = b - x[i]*U^T(i,:); 1999 2000 */ 2001 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2002 for (i=0; i<m; i++) 2003 t[i] = b[i]; 2004 2005 for (i=0; i<m; i++){ 2006 d = *(aa + ai[i]); /* diag[i] */ 2007 v = aa + ai[i] + 1; v1=v; 2008 vj = aj + ai[i] + 1; vj1=vj; 2009 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2010 sum = t[i]; 2011 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2012 x[i] = (1-omega)*x[i] + omega*sum/d; 2013 while (nz--) t[*vj++] -= x[i]*(*v++); 2014 PetscLogFlops(4*nz-2); 2015 } 2016 } 2017 2018 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2019 /* 2020 backward sweep: 2021 b = b - x[i]*U^T(i,:), i=0,...,n-2 2022 for i=m-1,...,0: 2023 sum[i] = (b[i] - U(i,:)x )/d[i]; 2024 x[i] = (1-omega)x[i] + omega*sum[i]; 2025 */ 2026 for (i=0; i<m; i++) 2027 t[i] = b[i]; 2028 2029 for (i=0; i<m-1; i++){ /* update rhs */ 2030 v = aa + ai[i] + 1; 2031 vj = aj + ai[i] + 1; 2032 nz = ai[i+1] - ai[i] - 1; 2033 while (nz--) t[*vj++] -= x[i]*(*v++); 2034 PetscLogFlops(2*nz-1); 2035 } 2036 for (i=m-1; i>=0; i--){ 2037 d = *(aa + ai[i]); 2038 v = aa + ai[i] + 1; 2039 vj = aj + ai[i] + 1; 2040 nz = ai[i+1] - ai[i] - 1; 2041 sum = t[i]; 2042 while (nz--) sum -= x[*vj++]*(*v++); 2043 PetscLogFlops(2*nz-1); 2044 x[i] = (1-omega)*x[i] + omega*sum/d; 2045 } 2046 } 2047 } 2048 2049 ierr = PetscFree(t); CHKERRQ(ierr); 2050 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2051 if (bb != xx) { 2052 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2053 } 2054 2055 PetscFunctionReturn(0); 2056 } 2057 2058 2059 2060 2061 2062 2063