1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 129 if (a->inew){ 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 a->inew = 0; 132 } 133 ierr = PetscFree(a);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 139 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 140 { 141 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 142 143 PetscFunctionBegin; 144 switch (op) { 145 case MAT_ROW_ORIENTED: 146 a->roworiented = PETSC_TRUE; 147 break; 148 case MAT_COLUMN_ORIENTED: 149 a->roworiented = PETSC_FALSE; 150 break; 151 case MAT_COLUMNS_SORTED: 152 a->sorted = PETSC_TRUE; 153 break; 154 case MAT_COLUMNS_UNSORTED: 155 a->sorted = PETSC_FALSE; 156 break; 157 case MAT_KEEP_ZEROED_ROWS: 158 a->keepzeroedrows = PETSC_TRUE; 159 break; 160 case MAT_NO_NEW_NONZERO_LOCATIONS: 161 a->nonew = 1; 162 break; 163 case MAT_NEW_NONZERO_LOCATION_ERR: 164 a->nonew = -1; 165 break; 166 case MAT_NEW_NONZERO_ALLOCATION_ERR: 167 a->nonew = -2; 168 break; 169 case MAT_YES_NEW_NONZERO_LOCATIONS: 170 a->nonew = 0; 171 break; 172 case MAT_ROWS_SORTED: 173 case MAT_ROWS_UNSORTED: 174 case MAT_YES_NEW_DIAGONALS: 175 case MAT_IGNORE_OFF_PROC_ENTRIES: 176 case MAT_USE_HASH_TABLE: 177 case MAT_USE_SINGLE_PRECISION_SOLVES: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 default: 183 SETERRQ(PETSC_ERR_SUP,"unknown option"); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 191 { 192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 193 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 194 MatScalar *aa,*aa_i; 195 PetscScalar *v_i; 196 197 PetscFunctionBegin; 198 bs = a->bs; 199 ai = a->i; 200 aj = a->j; 201 aa = a->a; 202 bs2 = a->bs2; 203 204 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 205 206 bn = row/bs; /* Block number */ 207 bp = row % bs; /* Block position */ 208 M = ai[bn+1] - ai[bn]; 209 *ncols = bs*M; 210 211 if (v) { 212 *v = 0; 213 if (*ncols) { 214 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 215 for (i=0; i<M; i++) { /* for each block in the block row */ 216 v_i = *v + i*bs; 217 aa_i = aa + bs2*(ai[bn] + i); 218 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 219 } 220 } 221 } 222 223 if (cols) { 224 *cols = 0; 225 if (*ncols) { 226 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 227 for (i=0; i<M; i++) { /* for each block in the block row */ 228 cols_i = *cols + i*bs; 229 itmp = bs*aj[ai[bn] + i]; 230 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 231 } 232 } 233 } 234 235 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 236 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 237 #ifdef column_search 238 v_i = *v + M*bs; 239 cols_i = *cols + M*bs; 240 for (i=0; i<bn; i++){ /* for each block row */ 241 M = ai[i+1] - ai[i]; 242 for (j=0; j<M; j++){ 243 itmp = aj[ai[i] + j]; /* block column value */ 244 if (itmp == bn){ 245 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 246 for (k=0; k<bs; k++) { 247 *cols_i++ = i*bs+k; 248 *v_i++ = aa_i[k]; 249 } 250 *ncols += bs; 251 break; 252 } 253 } 254 } 255 #endif 256 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 263 { 264 int ierr; 265 266 PetscFunctionBegin; 267 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 268 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 275 { 276 int ierr; 277 PetscFunctionBegin; 278 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 285 { 286 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 287 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 288 PetscScalar *aa; 289 FILE *file; 290 291 PetscFunctionBegin; 292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 293 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 294 col_lens[0] = MAT_FILE_COOKIE; 295 296 col_lens[1] = A->m; 297 col_lens[2] = A->m; 298 col_lens[3] = a->s_nz*bs2; 299 300 /* store lengths of each row and write (including header) to file */ 301 count = 0; 302 for (i=0; i<a->mbs; i++) { 303 for (j=0; j<bs; j++) { 304 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 305 } 306 } 307 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 308 ierr = PetscFree(col_lens);CHKERRQ(ierr); 309 310 /* store column indices (zero start index) */ 311 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 312 count = 0; 313 for (i=0; i<a->mbs; i++) { 314 for (j=0; j<bs; j++) { 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 jj[count++] = bs*a->j[k] + l; 318 } 319 } 320 } 321 } 322 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 323 ierr = PetscFree(jj);CHKERRQ(ierr); 324 325 /* store nonzero values */ 326 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 327 count = 0; 328 for (i=0; i<a->mbs; i++) { 329 for (j=0; j<bs; j++) { 330 for (k=a->i[i]; k<a->i[i+1]; k++) { 331 for (l=0; l<bs; l++) { 332 aa[count++] = a->a[bs2*k + l*bs + j]; 333 } 334 } 335 } 336 } 337 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 338 ierr = PetscFree(aa);CHKERRQ(ierr); 339 340 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 341 if (file) { 342 fprintf(file,"-matload_block_size %d\n",a->bs); 343 } 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 350 { 351 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 352 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 353 char *name; 354 PetscViewerFormat format; 355 356 PetscFunctionBegin; 357 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 358 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 359 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 360 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 361 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 362 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 363 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 365 for (i=0; i<a->mbs; i++) { 366 for (j=0; j<bs; j++) { 367 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 368 for (k=a->i[i]; k<a->i[i+1]; k++) { 369 for (l=0; l<bs; l++) { 370 #if defined(PETSC_USE_COMPLEX) 371 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 372 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 373 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 374 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } 380 #else 381 if (a->a[bs2*k + l*bs + j] != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 383 } 384 #endif 385 } 386 } 387 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 #if defined(PETSC_USE_COMPLEX) 399 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 400 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 401 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 402 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 403 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 404 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } else { 406 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } 408 #else 409 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 410 #endif 411 } 412 } 413 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 414 } 415 } 416 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 417 } 418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 425 { 426 Mat A = (Mat) Aa; 427 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 428 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 429 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 430 MatScalar *aa; 431 MPI_Comm comm; 432 PetscViewer viewer; 433 434 PetscFunctionBegin; 435 /* 436 This is nasty. If this is called from an originally parallel matrix 437 then all processes call this,but only the first has the matrix so the 438 rest should return immediately. 439 */ 440 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 441 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 442 if (rank) PetscFunctionReturn(0); 443 444 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 445 446 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 447 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 448 449 /* loop over matrix elements drawing boxes */ 450 color = PETSC_DRAW_BLUE; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) >= 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 color = PETSC_DRAW_CYAN; 465 for (i=0,row=0; i<mbs; i++,row+=bs) { 466 for (j=a->i[i]; j<a->i[i+1]; j++) { 467 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 468 x_l = a->j[j]*bs; x_r = x_l + 1.0; 469 aa = a->a + j*bs2; 470 for (k=0; k<bs; k++) { 471 for (l=0; l<bs; l++) { 472 if (PetscRealPart(*aa++) != 0.) continue; 473 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 474 } 475 } 476 } 477 } 478 479 color = PETSC_DRAW_RED; 480 for (i=0,row=0; i<mbs; i++,row+=bs) { 481 for (j=a->i[i]; j<a->i[i+1]; j++) { 482 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 483 x_l = a->j[j]*bs; x_r = x_l + 1.0; 484 aa = a->a + j*bs2; 485 for (k=0; k<bs; k++) { 486 for (l=0; l<bs; l++) { 487 if (PetscRealPart(*aa++) <= 0.) continue; 488 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 499 { 500 int ierr; 501 PetscReal xl,yl,xr,yr,w,h; 502 PetscDraw draw; 503 PetscTruth isnull; 504 505 PetscFunctionBegin; 506 507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 509 510 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 511 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 512 xr += w; yr += h; xl = -w; yl = -h; 513 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 514 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 515 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "MatView_SeqSBAIJ" 521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 522 { 523 int ierr; 524 PetscTruth issocket,isascii,isbinary,isdraw; 525 526 PetscFunctionBegin; 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 530 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 531 if (issocket) { 532 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 533 } else if (isascii){ 534 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 535 } else if (isbinary) { 536 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 537 } else if (isdraw) { 538 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 539 } else { 540 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 548 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 549 { 550 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 551 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 552 int *ai = a->i,*ailen = a->ilen; 553 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 554 MatScalar *ap,*aa = a->a,zero = 0.0; 555 556 PetscFunctionBegin; 557 for (k=0; k<m; k++) { /* loop over rows */ 558 row = im[k]; brow = row/bs; 559 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 560 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 561 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 562 nrow = ailen[brow]; 563 for (l=0; l<n; l++) { /* loop over columns */ 564 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 565 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 566 col = in[l] ; 567 bcol = col/bs; 568 cidx = col%bs; 569 ridx = row%bs; 570 high = nrow; 571 low = 0; /* assume unsorted */ 572 while (high-low > 5) { 573 t = (low+high)/2; 574 if (rp[t] > bcol) high = t; 575 else low = t; 576 } 577 for (i=low; i<high; i++) { 578 if (rp[i] > bcol) break; 579 if (rp[i] == bcol) { 580 *v++ = ap[bs2*i+bs*cidx+ridx]; 581 goto finished; 582 } 583 } 584 *v++ = zero; 585 finished:; 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 595 { 596 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 597 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 598 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 599 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 600 PetscTruth roworiented=a->roworiented; 601 MatScalar *value = v,*ap,*aa = a->a,*bap; 602 603 PetscFunctionBegin; 604 if (roworiented) { 605 stepval = (n-1)*bs; 606 } else { 607 stepval = (m-1)*bs; 608 } 609 for (k=0; k<m; k++) { /* loop over added rows */ 610 row = im[k]; 611 if (row < 0) continue; 612 #if defined(PETSC_USE_BOPT_g) 613 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 614 #endif 615 rp = aj + ai[row]; 616 ap = aa + bs2*ai[row]; 617 rmax = imax[row]; 618 nrow = ailen[row]; 619 low = 0; 620 for (l=0; l<n; l++) { /* loop over added columns */ 621 if (in[l] < 0) continue; 622 #if defined(PETSC_USE_BOPT_g) 623 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 624 #endif 625 col = in[l]; 626 if (col < row) continue; /* ignore lower triangular block */ 627 if (roworiented) { 628 value = v + k*(stepval+bs)*bs + l*bs; 629 } else { 630 value = v + l*(stepval+bs)*bs + k*bs; 631 } 632 if (!sorted) low = 0; high = nrow; 633 while (high-low > 7) { 634 t = (low+high)/2; 635 if (rp[t] > col) high = t; 636 else low = t; 637 } 638 for (i=low; i<high; i++) { 639 if (rp[i] > col) break; 640 if (rp[i] == col) { 641 bap = ap + bs2*i; 642 if (roworiented) { 643 if (is == ADD_VALUES) { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=ii; jj<bs2; jj+=bs) { 646 bap[jj] += *value++; 647 } 648 } 649 } else { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=ii; jj<bs2; jj+=bs) { 652 bap[jj] = *value++; 653 } 654 } 655 } 656 } else { 657 if (is == ADD_VALUES) { 658 for (ii=0; ii<bs; ii++,value+=stepval) { 659 for (jj=0; jj<bs; jj++) { 660 *bap++ += *value++; 661 } 662 } 663 } else { 664 for (ii=0; ii<bs; ii++,value+=stepval) { 665 for (jj=0; jj<bs; jj++) { 666 *bap++ = *value++; 667 } 668 } 669 } 670 } 671 goto noinsert2; 672 } 673 } 674 if (nonew == 1) goto noinsert2; 675 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 676 if (nrow >= rmax) { 677 /* there is no extra room in row, therefore enlarge */ 678 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 679 MatScalar *new_a; 680 681 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 682 683 /* malloc new storage space */ 684 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 685 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 686 new_j = (int*)(new_a + bs2*new_nz); 687 new_i = new_j + new_nz; 688 689 /* copy over old data into new slots */ 690 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 691 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 692 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 693 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 694 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 695 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 696 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 697 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 698 /* free up old matrix storage */ 699 ierr = PetscFree(a->a);CHKERRQ(ierr); 700 if (!a->singlemalloc) { 701 ierr = PetscFree(a->i);CHKERRQ(ierr); 702 ierr = PetscFree(a->j);CHKERRQ(ierr); 703 } 704 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 705 a->singlemalloc = PETSC_TRUE; 706 707 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 708 rmax = imax[row] = imax[row] + CHUNKSIZE; 709 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 710 a->s_maxnz += bs2*CHUNKSIZE; 711 a->reallocs++; 712 a->s_nz++; 713 } 714 N = nrow++ - 1; 715 /* shift up all the later entries in this row */ 716 for (ii=N; ii>=i; ii--) { 717 rp[ii+1] = rp[ii]; 718 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 719 } 720 if (N >= i) { 721 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 722 } 723 rp[i] = col; 724 bap = ap + bs2*i; 725 if (roworiented) { 726 for (ii=0; ii<bs; ii++,value+=stepval) { 727 for (jj=ii; jj<bs2; jj+=bs) { 728 bap[jj] = *value++; 729 } 730 } 731 } else { 732 for (ii=0; ii<bs; ii++,value+=stepval) { 733 for (jj=0; jj<bs; jj++) { 734 *bap++ = *value++; 735 } 736 } 737 } 738 noinsert2:; 739 low = i; 740 } 741 ailen[row] = nrow; 742 } 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 748 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 749 { 750 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 751 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 752 int m = A->m,*ip,N,*ailen = a->ilen; 753 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 754 MatScalar *aa = a->a,*ap; 755 #if defined(PETSC_HAVE_SPOOLES) 756 PetscTruth flag; 757 #endif 758 759 PetscFunctionBegin; 760 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 761 762 if (m) rmax = ailen[0]; 763 for (i=1; i<mbs; i++) { 764 /* move each row back by the amount of empty slots (fshift) before it*/ 765 fshift += imax[i-1] - ailen[i-1]; 766 rmax = PetscMax(rmax,ailen[i]); 767 if (fshift) { 768 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 769 N = ailen[i]; 770 for (j=0; j<N; j++) { 771 ip[j-fshift] = ip[j]; 772 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 773 } 774 } 775 ai[i] = ai[i-1] + ailen[i-1]; 776 } 777 if (mbs) { 778 fshift += imax[mbs-1] - ailen[mbs-1]; 779 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 780 } 781 /* reset ilen and imax for each row */ 782 for (i=0; i<mbs; i++) { 783 ailen[i] = imax[i] = ai[i+1] - ai[i]; 784 } 785 a->s_nz = ai[mbs]; 786 787 /* diagonals may have moved, reset it */ 788 if (a->diag) { 789 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 790 } 791 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 792 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 793 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 794 a->reallocs); 795 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 796 a->reallocs = 0; 797 A->info.nz_unneeded = (PetscReal)fshift*bs2; 798 799 #if defined(PETSC_HAVE_SPOOLES) 800 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 801 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 802 #endif 803 804 PetscFunctionReturn(0); 805 } 806 807 /* 808 This function returns an array of flags which indicate the locations of contiguous 809 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 810 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 811 Assume: sizes should be long enough to hold all the values. 812 */ 813 #undef __FUNCT__ 814 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 815 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 816 { 817 int i,j,k,row; 818 PetscTruth flg; 819 820 PetscFunctionBegin; 821 for (i=0,j=0; i<n; j++) { 822 row = idx[i]; 823 if (row%bs!=0) { /* Not the begining of a block */ 824 sizes[j] = 1; 825 i++; 826 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 827 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 828 i++; 829 } else { /* Begining of the block, so check if the complete block exists */ 830 flg = PETSC_TRUE; 831 for (k=1; k<bs; k++) { 832 if (row+k != idx[i+k]) { /* break in the block */ 833 flg = PETSC_FALSE; 834 break; 835 } 836 } 837 if (flg == PETSC_TRUE) { /* No break in the bs */ 838 sizes[j] = bs; 839 i+= bs; 840 } else { 841 sizes[j] = 1; 842 i++; 843 } 844 } 845 } 846 *bs_max = j; 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 852 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 853 { 854 PetscFunctionBegin; 855 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 856 } 857 858 /* Only add/insert a(i,j) with i<=j (blocks). 859 Any a(i,j) with i>j input by user is ingored. 860 */ 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 864 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 865 { 866 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 867 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 868 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 869 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 870 int ridx,cidx,bs2=a->bs2,ierr; 871 MatScalar *ap,value,*aa=a->a,*bap; 872 873 PetscFunctionBegin; 874 875 for (k=0; k<m; k++) { /* loop over added rows */ 876 row = im[k]; /* row number */ 877 brow = row/bs; /* block row number */ 878 if (row < 0) continue; 879 #if defined(PETSC_USE_BOPT_g) 880 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 881 #endif 882 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 883 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 884 rmax = imax[brow]; /* maximum space allocated for this row */ 885 nrow = ailen[brow]; /* actual length of this row */ 886 low = 0; 887 888 for (l=0; l<n; l++) { /* loop over added columns */ 889 if (in[l] < 0) continue; 890 #if defined(PETSC_USE_BOPT_g) 891 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 892 #endif 893 col = in[l]; 894 bcol = col/bs; /* block col number */ 895 896 if (brow <= bcol){ 897 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 898 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 899 /* element value a(k,l) */ 900 if (roworiented) { 901 value = v[l + k*n]; 902 } else { 903 value = v[k + l*m]; 904 } 905 906 /* move pointer bap to a(k,l) quickly and add/insert value */ 907 if (!sorted) low = 0; high = nrow; 908 while (high-low > 7) { 909 t = (low+high)/2; 910 if (rp[t] > bcol) high = t; 911 else low = t; 912 } 913 for (i=low; i<high; i++) { 914 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 915 if (rp[i] > bcol) break; 916 if (rp[i] == bcol) { 917 bap = ap + bs2*i + bs*cidx + ridx; 918 if (is == ADD_VALUES) *bap += value; 919 else *bap = value; 920 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 921 if (brow == bcol && ridx < cidx){ 922 bap = ap + bs2*i + bs*ridx + cidx; 923 if (is == ADD_VALUES) *bap += value; 924 else *bap = value; 925 } 926 goto noinsert1; 927 } 928 } 929 930 if (nonew == 1) goto noinsert1; 931 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 932 if (nrow >= rmax) { 933 /* there is no extra room in row, therefore enlarge */ 934 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 935 MatScalar *new_a; 936 937 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 938 939 /* Malloc new storage space */ 940 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 941 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 942 new_j = (int*)(new_a + bs2*new_nz); 943 new_i = new_j + new_nz; 944 945 /* copy over old data into new slots */ 946 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 947 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 948 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 949 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 950 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 951 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 952 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 953 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 954 /* free up old matrix storage */ 955 ierr = PetscFree(a->a);CHKERRQ(ierr); 956 if (!a->singlemalloc) { 957 ierr = PetscFree(a->i);CHKERRQ(ierr); 958 ierr = PetscFree(a->j);CHKERRQ(ierr); 959 } 960 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 961 a->singlemalloc = PETSC_TRUE; 962 963 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 964 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 965 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 966 a->s_maxnz += bs2*CHUNKSIZE; 967 a->reallocs++; 968 a->s_nz++; 969 } 970 971 N = nrow++ - 1; 972 /* shift up all the later entries in this row */ 973 for (ii=N; ii>=i; ii--) { 974 rp[ii+1] = rp[ii]; 975 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 976 } 977 if (N>=i) { 978 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 979 } 980 rp[i] = bcol; 981 ap[bs2*i + bs*cidx + ridx] = value; 982 noinsert1:; 983 low = i; 984 /* } */ 985 } 986 } /* end of if .. if.. */ 987 } /* end of loop over added columns */ 988 ailen[brow] = nrow; 989 } /* end of loop over added rows */ 990 991 PetscFunctionReturn(0); 992 } 993 994 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 995 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 996 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 997 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 998 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 999 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1000 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1001 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1002 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1003 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1004 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1005 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1006 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1007 extern int MatZeroEntries_SeqSBAIJ(Mat); 1008 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1009 1010 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1011 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1012 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1013 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1014 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1017 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1018 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1019 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1020 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1021 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1024 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1025 1026 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1027 1028 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1029 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1030 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1031 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1035 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1036 1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1045 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1046 1047 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1048 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1049 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1050 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1051 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1054 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1055 1056 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1057 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1058 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1059 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1060 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1063 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1064 1065 #ifdef HAVE_MatICCFactor 1066 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1069 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 1070 { 1071 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1072 Mat outA; 1073 int ierr; 1074 PetscTruth row_identity,col_identity; 1075 1076 PetscFunctionBegin; 1077 /* 1078 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1079 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1080 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1081 if (!row_identity || !col_identity) { 1082 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1083 } 1084 */ 1085 1086 outA = inA; 1087 inA->factor = FACTOR_CHOLESKY; 1088 1089 if (!a->diag) { 1090 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1091 } 1092 /* 1093 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1094 for ILU(0) factorization with natural ordering 1095 */ 1096 switch (a->bs) { 1097 case 1: 1098 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1099 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1100 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1101 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1102 case 2: 1103 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1104 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1105 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1106 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1107 break; 1108 case 3: 1109 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1110 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1111 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1112 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1113 break; 1114 case 4: 1115 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1116 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1117 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1118 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1119 break; 1120 case 5: 1121 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1122 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1123 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1124 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1125 break; 1126 case 6: 1127 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1128 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1129 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1130 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1131 break; 1132 case 7: 1133 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1134 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1135 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1136 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1137 break; 1138 default: 1139 a->row = row; 1140 a->icol = col; 1141 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1142 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1143 1144 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1145 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1146 PetscLogObjectParent(inA,a->icol); 1147 1148 if (!a->solve_work) { 1149 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1150 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1151 } 1152 } 1153 1154 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1155 1156 PetscFunctionReturn(0); 1157 } 1158 #endif 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1162 int MatPrintHelp_SeqSBAIJ(Mat A) 1163 { 1164 static PetscTruth called = PETSC_FALSE; 1165 MPI_Comm comm = A->comm; 1166 int ierr; 1167 1168 PetscFunctionBegin; 1169 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1170 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1171 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 EXTERN_C_BEGIN 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1178 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1179 { 1180 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1181 int i,nz,n; 1182 1183 PetscFunctionBegin; 1184 nz = baij->s_maxnz; 1185 n = mat->n; 1186 for (i=0; i<nz; i++) { 1187 baij->j[i] = indices[i]; 1188 } 1189 baij->s_nz = nz; 1190 for (i=0; i<n; i++) { 1191 baij->ilen[i] = baij->imax[i]; 1192 } 1193 1194 PetscFunctionReturn(0); 1195 } 1196 EXTERN_C_END 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1200 /*@ 1201 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1202 in the matrix. 1203 1204 Input Parameters: 1205 + mat - the SeqSBAIJ matrix 1206 - indices - the column indices 1207 1208 Level: advanced 1209 1210 Notes: 1211 This can be called if you have precomputed the nonzero structure of the 1212 matrix and want to provide it to the matrix object to improve the performance 1213 of the MatSetValues() operation. 1214 1215 You MUST have set the correct numbers of nonzeros per row in the call to 1216 MatCreateSeqSBAIJ(). 1217 1218 MUST be called before any calls to MatSetValues() 1219 1220 .seealso: MatCreateSeqSBAIJ 1221 @*/ 1222 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1223 { 1224 int ierr,(*f)(Mat,int *); 1225 1226 PetscFunctionBegin; 1227 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1228 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1229 if (f) { 1230 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1231 } else { 1232 SETERRQ(1,"Wrong type of matrix to set column indices"); 1233 } 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1239 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1240 { 1241 int ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1250 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array) 1251 { 1252 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1253 PetscFunctionBegin; 1254 *array = a->a; 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1260 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array) 1261 { 1262 PetscFunctionBegin; 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #include "petscblaslapack.h" 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1269 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1270 { 1271 int ierr,one=1; 1272 Mat_SeqSBAIJ *x = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data; 1273 1274 PetscFunctionBegin; 1275 if (str == SAME_NONZERO_PATTERN) { 1276 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1277 } else { 1278 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* -------------------------------------------------------------------*/ 1284 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1285 MatGetRow_SeqSBAIJ, 1286 MatRestoreRow_SeqSBAIJ, 1287 MatMult_SeqSBAIJ_N, 1288 MatMultAdd_SeqSBAIJ_N, 1289 MatMultTranspose_SeqSBAIJ, 1290 MatMultTransposeAdd_SeqSBAIJ, 1291 MatSolve_SeqSBAIJ_N, 1292 0, 1293 0, 1294 0, 1295 0, 1296 MatCholeskyFactor_SeqSBAIJ, 1297 MatRelax_SeqSBAIJ, 1298 MatTranspose_SeqSBAIJ, 1299 MatGetInfo_SeqSBAIJ, 1300 MatEqual_SeqSBAIJ, 1301 MatGetDiagonal_SeqSBAIJ, 1302 MatDiagonalScale_SeqSBAIJ, 1303 MatNorm_SeqSBAIJ, 1304 0, 1305 MatAssemblyEnd_SeqSBAIJ, 1306 0, 1307 MatSetOption_SeqSBAIJ, 1308 MatZeroEntries_SeqSBAIJ, 1309 MatZeroRows_SeqSBAIJ, 1310 0, 1311 0, 1312 MatCholeskyFactorSymbolic_SeqSBAIJ, 1313 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1314 MatSetUpPreallocation_SeqSBAIJ, 1315 0, 1316 MatICCFactorSymbolic_SeqSBAIJ, 1317 MatGetArray_SeqSBAIJ, 1318 MatRestoreArray_SeqSBAIJ, 1319 MatDuplicate_SeqSBAIJ, 1320 0, 1321 0, 1322 0, 1323 0, 1324 MatAXPY_SeqSBAIJ, 1325 MatGetSubMatrices_SeqSBAIJ, 1326 MatIncreaseOverlap_SeqSBAIJ, 1327 MatGetValues_SeqSBAIJ, 1328 0, 1329 MatPrintHelp_SeqSBAIJ, 1330 MatScale_SeqSBAIJ, 1331 0, 1332 0, 1333 0, 1334 MatGetBlockSize_SeqSBAIJ, 1335 MatGetRowIJ_SeqSBAIJ, 1336 MatRestoreRowIJ_SeqSBAIJ, 1337 0, 1338 0, 1339 0, 1340 0, 1341 0, 1342 0, 1343 MatSetValuesBlocked_SeqSBAIJ, 1344 MatGetSubMatrix_SeqSBAIJ, 1345 0, 1346 0, 1347 MatGetPetscMaps_Petsc, 1348 0, 1349 0, 1350 0, 1351 0, 1352 0, 1353 0, 1354 MatGetRowMax_SeqSBAIJ, 1355 0, 1356 0, 1357 0, 1358 0, 1359 0, 1360 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 0, 1366 0, 1367 0, 1368 #if !defined(PETSC_USE_COMPLEX) 1369 MatGetInertia_SeqSBAIJ 1370 #else 1371 0 1372 #endif 1373 }; 1374 1375 EXTERN_C_BEGIN 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1378 int MatStoreValues_SeqSBAIJ(Mat mat) 1379 { 1380 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1381 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1382 int ierr; 1383 1384 PetscFunctionBegin; 1385 if (aij->nonew != 1) { 1386 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1387 } 1388 1389 /* allocate space for values if not already there */ 1390 if (!aij->saved_values) { 1391 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1392 } 1393 1394 /* copy values over */ 1395 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 EXTERN_C_END 1399 1400 EXTERN_C_BEGIN 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1403 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1404 { 1405 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1406 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1407 1408 PetscFunctionBegin; 1409 if (aij->nonew != 1) { 1410 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1411 } 1412 if (!aij->saved_values) { 1413 SETERRQ(1,"Must call MatStoreValues(A);first"); 1414 } 1415 1416 /* copy values over */ 1417 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 EXTERN_C_END 1421 1422 EXTERN_C_BEGIN 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1425 int MatCreate_SeqSBAIJ(Mat B) 1426 { 1427 Mat_SeqSBAIJ *b; 1428 int ierr,size; 1429 1430 PetscFunctionBegin; 1431 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1432 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1433 B->m = B->M = PetscMax(B->m,B->M); 1434 B->n = B->N = PetscMax(B->n,B->N); 1435 1436 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1437 B->data = (void*)b; 1438 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1439 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1440 B->ops->destroy = MatDestroy_SeqSBAIJ; 1441 B->ops->view = MatView_SeqSBAIJ; 1442 B->factor = 0; 1443 B->lupivotthreshold = 1.0; 1444 B->mapping = 0; 1445 b->row = 0; 1446 b->icol = 0; 1447 b->reallocs = 0; 1448 b->saved_values = 0; 1449 1450 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1451 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1452 1453 b->sorted = PETSC_FALSE; 1454 b->roworiented = PETSC_TRUE; 1455 b->nonew = 0; 1456 b->diag = 0; 1457 b->solve_work = 0; 1458 b->mult_work = 0; 1459 B->spptr = 0; 1460 b->keepzeroedrows = PETSC_FALSE; 1461 1462 b->inew = 0; 1463 b->jnew = 0; 1464 b->anew = 0; 1465 b->a2anew = 0; 1466 b->permute = PETSC_FALSE; 1467 1468 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1469 "MatStoreValues_SeqSBAIJ", 1470 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1471 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1472 "MatRetrieveValues_SeqSBAIJ", 1473 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1474 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1475 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1476 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 EXTERN_C_END 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1483 /*@C 1484 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1485 compressed row) format. For good matrix assembly performance the 1486 user should preallocate the matrix storage by setting the parameter nz 1487 (or the array nnz). By setting these parameters accurately, performance 1488 during matrix assembly can be increased by more than a factor of 50. 1489 1490 Collective on Mat 1491 1492 Input Parameters: 1493 + A - the symmetric matrix 1494 . bs - size of block 1495 . nz - number of block nonzeros per block row (same for all rows) 1496 - nnz - array containing the number of block nonzeros in the upper triangular plus 1497 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1498 1499 Options Database Keys: 1500 . -mat_no_unroll - uses code that does not unroll the loops in the 1501 block calculations (much slower) 1502 . -mat_block_size - size of the blocks to use 1503 1504 Level: intermediate 1505 1506 Notes: 1507 Specify the preallocated storage with either nz or nnz (not both). 1508 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1509 allocation. For additional details, see the users manual chapter on 1510 matrices. 1511 1512 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1513 @*/ 1514 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1515 { 1516 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1517 int i,len,ierr,mbs,bs2; 1518 PetscTruth flg; 1519 int s_nz; 1520 1521 PetscFunctionBegin; 1522 B->preallocated = PETSC_TRUE; 1523 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1524 mbs = B->m/bs; 1525 bs2 = bs*bs; 1526 1527 if (mbs*bs != B->m) { 1528 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1529 } 1530 1531 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1532 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1533 if (nnz) { 1534 for (i=0; i<mbs; i++) { 1535 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1536 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1537 } 1538 } 1539 1540 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1541 if (!flg) { 1542 switch (bs) { 1543 case 1: 1544 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1545 B->ops->solve = MatSolve_SeqSBAIJ_1; 1546 B->ops->solves = MatSolves_SeqSBAIJ_1; 1547 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1548 B->ops->mult = MatMult_SeqSBAIJ_1; 1549 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1550 break; 1551 case 2: 1552 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1553 B->ops->solve = MatSolve_SeqSBAIJ_2; 1554 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1555 B->ops->mult = MatMult_SeqSBAIJ_2; 1556 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1557 break; 1558 case 3: 1559 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1560 B->ops->solve = MatSolve_SeqSBAIJ_3; 1561 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1562 B->ops->mult = MatMult_SeqSBAIJ_3; 1563 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1564 break; 1565 case 4: 1566 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1567 B->ops->solve = MatSolve_SeqSBAIJ_4; 1568 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1569 B->ops->mult = MatMult_SeqSBAIJ_4; 1570 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1571 break; 1572 case 5: 1573 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1574 B->ops->solve = MatSolve_SeqSBAIJ_5; 1575 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1576 B->ops->mult = MatMult_SeqSBAIJ_5; 1577 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1578 break; 1579 case 6: 1580 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1581 B->ops->solve = MatSolve_SeqSBAIJ_6; 1582 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1583 B->ops->mult = MatMult_SeqSBAIJ_6; 1584 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1585 break; 1586 case 7: 1587 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1588 B->ops->solve = MatSolve_SeqSBAIJ_7; 1589 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1590 B->ops->mult = MatMult_SeqSBAIJ_7; 1591 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1592 break; 1593 } 1594 } 1595 1596 b->mbs = mbs; 1597 b->nbs = mbs; 1598 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1599 if (!nnz) { 1600 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1601 else if (nz <= 0) nz = 1; 1602 for (i=0; i<mbs; i++) { 1603 b->imax[i] = nz; 1604 } 1605 nz = nz*mbs; /* total nz */ 1606 } else { 1607 nz = 0; 1608 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1609 } 1610 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1611 s_nz = nz; 1612 1613 /* allocate the matrix space */ 1614 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1615 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1616 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1617 b->j = (int*)(b->a + s_nz*bs2); 1618 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1619 b->i = b->j + s_nz; 1620 b->singlemalloc = PETSC_TRUE; 1621 1622 /* pointer to beginning of each row */ 1623 b->i[0] = 0; 1624 for (i=1; i<mbs+1; i++) { 1625 b->i[i] = b->i[i-1] + b->imax[i-1]; 1626 } 1627 1628 /* b->ilen will count nonzeros in each block row so far. */ 1629 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1630 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1631 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1632 1633 b->bs = bs; 1634 b->bs2 = bs2; 1635 b->s_nz = 0; 1636 b->s_maxnz = s_nz*bs2; 1637 1638 b->inew = 0; 1639 b->jnew = 0; 1640 b->anew = 0; 1641 b->a2anew = 0; 1642 b->permute = PETSC_FALSE; 1643 PetscFunctionReturn(0); 1644 } 1645 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatCreateSeqSBAIJ" 1649 /*@C 1650 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1651 compressed row) format. For good matrix assembly performance the 1652 user should preallocate the matrix storage by setting the parameter nz 1653 (or the array nnz). By setting these parameters accurately, performance 1654 during matrix assembly can be increased by more than a factor of 50. 1655 1656 Collective on MPI_Comm 1657 1658 Input Parameters: 1659 + comm - MPI communicator, set to PETSC_COMM_SELF 1660 . bs - size of block 1661 . m - number of rows, or number of columns 1662 . nz - number of block nonzeros per block row (same for all rows) 1663 - nnz - array containing the number of block nonzeros in the upper triangular plus 1664 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1665 1666 Output Parameter: 1667 . A - the symmetric matrix 1668 1669 Options Database Keys: 1670 . -mat_no_unroll - uses code that does not unroll the loops in the 1671 block calculations (much slower) 1672 . -mat_block_size - size of the blocks to use 1673 1674 Level: intermediate 1675 1676 Notes: 1677 1678 Specify the preallocated storage with either nz or nnz (not both). 1679 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1680 allocation. For additional details, see the users manual chapter on 1681 matrices. 1682 1683 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1684 @*/ 1685 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1686 { 1687 int ierr; 1688 1689 PetscFunctionBegin; 1690 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1691 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1692 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1698 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1699 { 1700 Mat C; 1701 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1702 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1703 1704 PetscFunctionBegin; 1705 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1706 1707 *B = 0; 1708 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1709 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1710 c = (Mat_SeqSBAIJ*)C->data; 1711 1712 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1713 C->preallocated = PETSC_TRUE; 1714 C->factor = A->factor; 1715 c->row = 0; 1716 c->icol = 0; 1717 c->saved_values = 0; 1718 c->keepzeroedrows = a->keepzeroedrows; 1719 C->assembled = PETSC_TRUE; 1720 1721 c->bs = a->bs; 1722 c->bs2 = a->bs2; 1723 c->mbs = a->mbs; 1724 c->nbs = a->nbs; 1725 1726 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1727 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1728 for (i=0; i<mbs; i++) { 1729 c->imax[i] = a->imax[i]; 1730 c->ilen[i] = a->ilen[i]; 1731 } 1732 1733 /* allocate the matrix space */ 1734 c->singlemalloc = PETSC_TRUE; 1735 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1736 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1737 c->j = (int*)(c->a + nz*bs2); 1738 c->i = c->j + nz; 1739 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1740 if (mbs > 0) { 1741 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1742 if (cpvalues == MAT_COPY_VALUES) { 1743 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1744 } else { 1745 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1746 } 1747 } 1748 1749 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1750 c->sorted = a->sorted; 1751 c->roworiented = a->roworiented; 1752 c->nonew = a->nonew; 1753 1754 if (a->diag) { 1755 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1756 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1757 for (i=0; i<mbs; i++) { 1758 c->diag[i] = a->diag[i]; 1759 } 1760 } else c->diag = 0; 1761 c->s_nz = a->s_nz; 1762 c->s_maxnz = a->s_maxnz; 1763 c->solve_work = 0; 1764 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1765 c->mult_work = 0; 1766 *B = C; 1767 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 EXTERN_C_BEGIN 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1774 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1775 { 1776 Mat_SeqSBAIJ *a; 1777 Mat B; 1778 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1779 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1780 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1781 int *masked,nmask,tmp,bs2,ishift; 1782 PetscScalar *aa; 1783 MPI_Comm comm = ((PetscObject)viewer)->comm; 1784 1785 PetscFunctionBegin; 1786 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1787 bs2 = bs*bs; 1788 1789 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1790 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1791 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1792 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1793 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1794 M = header[1]; N = header[2]; nz = header[3]; 1795 1796 if (header[3] < 0) { 1797 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1798 } 1799 1800 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1801 1802 /* 1803 This code adds extra rows to make sure the number of rows is 1804 divisible by the blocksize 1805 */ 1806 mbs = M/bs; 1807 extra_rows = bs - M + bs*(mbs); 1808 if (extra_rows == bs) extra_rows = 0; 1809 else mbs++; 1810 if (extra_rows) { 1811 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1812 } 1813 1814 /* read in row lengths */ 1815 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1816 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1817 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1818 1819 /* read in column indices */ 1820 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1821 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1822 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1823 1824 /* loop over row lengths determining block row lengths */ 1825 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1826 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1827 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1828 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1829 masked = mask + mbs; 1830 rowcount = 0; nzcount = 0; 1831 for (i=0; i<mbs; i++) { 1832 nmask = 0; 1833 for (j=0; j<bs; j++) { 1834 kmax = rowlengths[rowcount]; 1835 for (k=0; k<kmax; k++) { 1836 tmp = jj[nzcount++]/bs; /* block col. index */ 1837 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1838 } 1839 rowcount++; 1840 } 1841 s_browlengths[i] += nmask; 1842 1843 /* zero out the mask elements we set */ 1844 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1845 } 1846 1847 /* create our matrix */ 1848 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1849 B = *A; 1850 a = (Mat_SeqSBAIJ*)B->data; 1851 1852 /* set matrix "i" values */ 1853 a->i[0] = 0; 1854 for (i=1; i<= mbs; i++) { 1855 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1856 a->ilen[i-1] = s_browlengths[i-1]; 1857 } 1858 a->s_nz = a->i[mbs]; 1859 1860 /* read in nonzero values */ 1861 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1862 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1863 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1864 1865 /* set "a" and "j" values into matrix */ 1866 nzcount = 0; jcount = 0; 1867 for (i=0; i<mbs; i++) { 1868 nzcountb = nzcount; 1869 nmask = 0; 1870 for (j=0; j<bs; j++) { 1871 kmax = rowlengths[i*bs+j]; 1872 for (k=0; k<kmax; k++) { 1873 tmp = jj[nzcount++]/bs; /* block col. index */ 1874 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1875 } 1876 } 1877 /* sort the masked values */ 1878 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1879 1880 /* set "j" values into matrix */ 1881 maskcount = 1; 1882 for (j=0; j<nmask; j++) { 1883 a->j[jcount++] = masked[j]; 1884 mask[masked[j]] = maskcount++; 1885 } 1886 1887 /* set "a" values into matrix */ 1888 ishift = bs2*a->i[i]; 1889 for (j=0; j<bs; j++) { 1890 kmax = rowlengths[i*bs+j]; 1891 for (k=0; k<kmax; k++) { 1892 tmp = jj[nzcountb]/bs ; /* block col. index */ 1893 if (tmp >= i){ 1894 block = mask[tmp] - 1; 1895 point = jj[nzcountb] - bs*tmp; 1896 idx = ishift + bs2*block + j + bs*point; 1897 a->a[idx] = aa[nzcountb]; 1898 } 1899 nzcountb++; 1900 } 1901 } 1902 /* zero out the mask elements we set */ 1903 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1904 } 1905 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1906 1907 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1908 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1909 ierr = PetscFree(aa);CHKERRQ(ierr); 1910 ierr = PetscFree(jj);CHKERRQ(ierr); 1911 ierr = PetscFree(mask);CHKERRQ(ierr); 1912 1913 B->assembled = PETSC_TRUE; 1914 ierr = MatView_Private(B);CHKERRQ(ierr); 1915 PetscFunctionReturn(0); 1916 } 1917 EXTERN_C_END 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1921 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1922 { 1923 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1924 MatScalar *aa=a->a,*v,*v1; 1925 PetscScalar *x,*b,*t,sum,d; 1926 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1927 int nz,nz1,*vj,*vj1,i; 1928 1929 PetscFunctionBegin; 1930 its = its*lits; 1931 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1932 1933 if (bs > 1) 1934 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1935 1936 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1937 if (xx != bb) { 1938 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1939 } else { 1940 b = x; 1941 } 1942 1943 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1944 1945 if (flag & SOR_ZERO_INITIAL_GUESS) { 1946 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1947 for (i=0; i<m; i++) 1948 t[i] = b[i]; 1949 1950 for (i=0; i<m; i++){ 1951 d = *(aa + ai[i]); /* diag[i] */ 1952 v = aa + ai[i] + 1; 1953 vj = aj + ai[i] + 1; 1954 nz = ai[i+1] - ai[i] - 1; 1955 x[i] = omega*t[i]/d; 1956 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1957 PetscLogFlops(2*nz-1); 1958 } 1959 } 1960 1961 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1962 for (i=0; i<m; i++) 1963 t[i] = b[i]; 1964 1965 for (i=0; i<m-1; i++){ /* update rhs */ 1966 v = aa + ai[i] + 1; 1967 vj = aj + ai[i] + 1; 1968 nz = ai[i+1] - ai[i] - 1; 1969 while (nz--) t[*vj++] -= x[i]*(*v++); 1970 PetscLogFlops(2*nz-1); 1971 } 1972 for (i=m-1; i>=0; i--){ 1973 d = *(aa + ai[i]); 1974 v = aa + ai[i] + 1; 1975 vj = aj + ai[i] + 1; 1976 nz = ai[i+1] - ai[i] - 1; 1977 sum = t[i]; 1978 while (nz--) sum -= x[*vj++]*(*v++); 1979 PetscLogFlops(2*nz-1); 1980 x[i] = (1-omega)*x[i] + omega*sum/d; 1981 } 1982 } 1983 its--; 1984 } 1985 1986 while (its--) { 1987 /* 1988 forward sweep: 1989 for i=0,...,m-1: 1990 sum[i] = (b[i] - U(i,:)x )/d[i]; 1991 x[i] = (1-omega)x[i] + omega*sum[i]; 1992 b = b - x[i]*U^T(i,:); 1993 1994 */ 1995 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1996 for (i=0; i<m; i++) 1997 t[i] = b[i]; 1998 1999 for (i=0; i<m; i++){ 2000 d = *(aa + ai[i]); /* diag[i] */ 2001 v = aa + ai[i] + 1; v1=v; 2002 vj = aj + ai[i] + 1; vj1=vj; 2003 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2004 sum = t[i]; 2005 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2006 x[i] = (1-omega)*x[i] + omega*sum/d; 2007 while (nz--) t[*vj++] -= x[i]*(*v++); 2008 PetscLogFlops(4*nz-2); 2009 } 2010 } 2011 2012 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2013 /* 2014 backward sweep: 2015 b = b - x[i]*U^T(i,:), i=0,...,n-2 2016 for i=m-1,...,0: 2017 sum[i] = (b[i] - U(i,:)x )/d[i]; 2018 x[i] = (1-omega)x[i] + omega*sum[i]; 2019 */ 2020 for (i=0; i<m; i++) 2021 t[i] = b[i]; 2022 2023 for (i=0; i<m-1; i++){ /* update rhs */ 2024 v = aa + ai[i] + 1; 2025 vj = aj + ai[i] + 1; 2026 nz = ai[i+1] - ai[i] - 1; 2027 while (nz--) t[*vj++] -= x[i]*(*v++); 2028 PetscLogFlops(2*nz-1); 2029 } 2030 for (i=m-1; i>=0; i--){ 2031 d = *(aa + ai[i]); 2032 v = aa + ai[i] + 1; 2033 vj = aj + ai[i] + 1; 2034 nz = ai[i+1] - ai[i] - 1; 2035 sum = t[i]; 2036 while (nz--) sum -= x[*vj++]*(*v++); 2037 PetscLogFlops(2*nz-1); 2038 x[i] = (1-omega)*x[i] + omega*sum/d; 2039 } 2040 } 2041 } 2042 2043 ierr = PetscFree(t); CHKERRQ(ierr); 2044 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2045 if (bb != xx) { 2046 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2047 } 2048 2049 PetscFunctionReturn(0); 2050 } 2051 2052 2053 2054 2055 2056 2057