1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 77 int i,n = a->mbs,ierr; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 124 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 125 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 127 128 if (a->inew){ 129 ierr = PetscFree(a->inew);CHKERRQ(ierr); 130 a->inew = 0; 131 } 132 ierr = PetscFree(a);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 138 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 139 { 140 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 141 142 PetscFunctionBegin; 143 switch (op) { 144 case MAT_ROW_ORIENTED: 145 a->roworiented = PETSC_TRUE; 146 break; 147 case MAT_COLUMN_ORIENTED: 148 a->roworiented = PETSC_FALSE; 149 break; 150 case MAT_COLUMNS_SORTED: 151 a->sorted = PETSC_TRUE; 152 break; 153 case MAT_COLUMNS_UNSORTED: 154 a->sorted = PETSC_FALSE; 155 break; 156 case MAT_KEEP_ZEROED_ROWS: 157 a->keepzeroedrows = PETSC_TRUE; 158 break; 159 case MAT_NO_NEW_NONZERO_LOCATIONS: 160 a->nonew = 1; 161 break; 162 case MAT_NEW_NONZERO_LOCATION_ERR: 163 a->nonew = -1; 164 break; 165 case MAT_NEW_NONZERO_ALLOCATION_ERR: 166 a->nonew = -2; 167 break; 168 case MAT_YES_NEW_NONZERO_LOCATIONS: 169 a->nonew = 0; 170 break; 171 case MAT_ROWS_SORTED: 172 case MAT_ROWS_UNSORTED: 173 case MAT_YES_NEW_DIAGONALS: 174 case MAT_IGNORE_OFF_PROC_ENTRIES: 175 case MAT_USE_HASH_TABLE: 176 case MAT_USE_SINGLE_PRECISION_SOLVES: 177 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 178 break; 179 case MAT_NO_NEW_DIAGONALS: 180 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 181 default: 182 SETERRQ(PETSC_ERR_SUP,"unknown option"); 183 } 184 PetscFunctionReturn(0); 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 189 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 190 { 191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 192 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 193 MatScalar *aa,*aa_i; 194 PetscScalar *v_i; 195 196 PetscFunctionBegin; 197 bs = a->bs; 198 ai = a->i; 199 aj = a->j; 200 aa = a->a; 201 bs2 = a->bs2; 202 203 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 204 205 bn = row/bs; /* Block number */ 206 bp = row % bs; /* Block position */ 207 M = ai[bn+1] - ai[bn]; 208 *ncols = bs*M; 209 210 if (v) { 211 *v = 0; 212 if (*ncols) { 213 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 214 for (i=0; i<M; i++) { /* for each block in the block row */ 215 v_i = *v + i*bs; 216 aa_i = aa + bs2*(ai[bn] + i); 217 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 218 } 219 } 220 } 221 222 if (cols) { 223 *cols = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 cols_i = *cols + i*bs; 228 itmp = bs*aj[ai[bn] + i]; 229 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 230 } 231 } 232 } 233 234 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 235 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 236 #ifdef column_search 237 v_i = *v + M*bs; 238 cols_i = *cols + M*bs; 239 for (i=0; i<bn; i++){ /* for each block row */ 240 M = ai[i+1] - ai[i]; 241 for (j=0; j<M; j++){ 242 itmp = aj[ai[i] + j]; /* block column value */ 243 if (itmp == bn){ 244 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 245 for (k=0; k<bs; k++) { 246 *cols_i++ = i*bs+k; 247 *v_i++ = aa_i[k]; 248 } 249 *ncols += bs; 250 break; 251 } 252 } 253 } 254 #endif 255 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 261 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 262 { 263 int ierr; 264 265 PetscFunctionBegin; 266 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 267 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 273 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 274 { 275 int ierr; 276 PetscFunctionBegin; 277 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 283 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 284 { 285 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 286 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 287 PetscScalar *aa; 288 FILE *file; 289 290 PetscFunctionBegin; 291 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 292 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 293 col_lens[0] = MAT_FILE_COOKIE; 294 295 col_lens[1] = A->m; 296 col_lens[2] = A->m; 297 col_lens[3] = a->s_nz*bs2; 298 299 /* store lengths of each row and write (including header) to file */ 300 count = 0; 301 for (i=0; i<a->mbs; i++) { 302 for (j=0; j<bs; j++) { 303 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 304 } 305 } 306 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 307 ierr = PetscFree(col_lens);CHKERRQ(ierr); 308 309 /* store column indices (zero start index) */ 310 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 311 count = 0; 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 for (k=a->i[i]; k<a->i[i+1]; k++) { 315 for (l=0; l<bs; l++) { 316 jj[count++] = bs*a->j[k] + l; 317 } 318 } 319 } 320 } 321 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 322 ierr = PetscFree(jj);CHKERRQ(ierr); 323 324 /* store nonzero values */ 325 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 326 count = 0; 327 for (i=0; i<a->mbs; i++) { 328 for (j=0; j<bs; j++) { 329 for (k=a->i[i]; k<a->i[i+1]; k++) { 330 for (l=0; l<bs; l++) { 331 aa[count++] = a->a[bs2*k + l*bs + j]; 332 } 333 } 334 } 335 } 336 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 337 ierr = PetscFree(aa);CHKERRQ(ierr); 338 339 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 340 if (file) { 341 fprintf(file,"-matload_block_size %d\n",a->bs); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 348 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 349 { 350 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 351 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 352 char *name; 353 PetscViewerFormat format; 354 355 PetscFunctionBegin; 356 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 357 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 358 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 359 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 360 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 361 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 362 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 364 for (i=0; i<a->mbs; i++) { 365 for (j=0; j<bs; j++) { 366 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 367 for (k=a->i[i]; k<a->i[i+1]; k++) { 368 for (l=0; l<bs; l++) { 369 #if defined(PETSC_USE_COMPLEX) 370 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 371 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 372 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 373 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 374 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 375 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 376 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 377 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 378 } 379 #else 380 if (a->a[bs2*k + l*bs + j] != 0.0) { 381 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 382 } 383 #endif 384 } 385 } 386 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 387 } 388 } 389 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 390 } else { 391 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 392 for (i=0; i<a->mbs; i++) { 393 for (j=0; j<bs; j++) { 394 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 395 for (k=a->i[i]; k<a->i[i+1]; k++) { 396 for (l=0; l<bs; l++) { 397 #if defined(PETSC_USE_COMPLEX) 398 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 399 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 400 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 401 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 402 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 403 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 404 } else { 405 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 406 } 407 #else 408 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 409 #endif 410 } 411 } 412 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 413 } 414 } 415 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 416 } 417 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 423 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 424 { 425 Mat A = (Mat) Aa; 426 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 427 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 428 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 429 MatScalar *aa; 430 MPI_Comm comm; 431 PetscViewer viewer; 432 433 PetscFunctionBegin; 434 /* 435 This is nasty. If this is called from an originally parallel matrix 436 then all processes call this,but only the first has the matrix so the 437 rest should return immediately. 438 */ 439 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 440 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 441 if (rank) PetscFunctionReturn(0); 442 443 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 444 445 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 446 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 447 448 /* loop over matrix elements drawing boxes */ 449 color = PETSC_DRAW_BLUE; 450 for (i=0,row=0; i<mbs; i++,row+=bs) { 451 for (j=a->i[i]; j<a->i[i+1]; j++) { 452 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 453 x_l = a->j[j]*bs; x_r = x_l + 1.0; 454 aa = a->a + j*bs2; 455 for (k=0; k<bs; k++) { 456 for (l=0; l<bs; l++) { 457 if (PetscRealPart(*aa++) >= 0.) continue; 458 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 459 } 460 } 461 } 462 } 463 color = PETSC_DRAW_CYAN; 464 for (i=0,row=0; i<mbs; i++,row+=bs) { 465 for (j=a->i[i]; j<a->i[i+1]; j++) { 466 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 467 x_l = a->j[j]*bs; x_r = x_l + 1.0; 468 aa = a->a + j*bs2; 469 for (k=0; k<bs; k++) { 470 for (l=0; l<bs; l++) { 471 if (PetscRealPart(*aa++) != 0.) continue; 472 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 473 } 474 } 475 } 476 } 477 478 color = PETSC_DRAW_RED; 479 for (i=0,row=0; i<mbs; i++,row+=bs) { 480 for (j=a->i[i]; j<a->i[i+1]; j++) { 481 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 482 x_l = a->j[j]*bs; x_r = x_l + 1.0; 483 aa = a->a + j*bs2; 484 for (k=0; k<bs; k++) { 485 for (l=0; l<bs; l++) { 486 if (PetscRealPart(*aa++) <= 0.) continue; 487 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 497 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 498 { 499 int ierr; 500 PetscReal xl,yl,xr,yr,w,h; 501 PetscDraw draw; 502 PetscTruth isnull; 503 504 PetscFunctionBegin; 505 506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 508 509 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 510 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 511 xr += w; yr += h; xl = -w; yl = -h; 512 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 513 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 514 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNCT__ 519 #define __FUNCT__ "MatView_SeqSBAIJ" 520 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 521 { 522 int ierr; 523 PetscTruth issocket,isascii,isbinary,isdraw; 524 525 PetscFunctionBegin; 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 530 if (issocket) { 531 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 532 } else if (isascii){ 533 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 534 } else if (isbinary) { 535 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 536 } else if (isdraw) { 537 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 538 } else { 539 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 540 } 541 PetscFunctionReturn(0); 542 } 543 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 547 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 548 { 549 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 550 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 551 int *ai = a->i,*ailen = a->ilen; 552 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 553 MatScalar *ap,*aa = a->a,zero = 0.0; 554 555 PetscFunctionBegin; 556 for (k=0; k<m; k++) { /* loop over rows */ 557 row = im[k]; brow = row/bs; 558 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 559 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 560 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 561 nrow = ailen[brow]; 562 for (l=0; l<n; l++) { /* loop over columns */ 563 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 564 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 565 col = in[l] ; 566 bcol = col/bs; 567 cidx = col%bs; 568 ridx = row%bs; 569 high = nrow; 570 low = 0; /* assume unsorted */ 571 while (high-low > 5) { 572 t = (low+high)/2; 573 if (rp[t] > bcol) high = t; 574 else low = t; 575 } 576 for (i=low; i<high; i++) { 577 if (rp[i] > bcol) break; 578 if (rp[i] == bcol) { 579 *v++ = ap[bs2*i+bs*cidx+ridx]; 580 goto finished; 581 } 582 } 583 *v++ = zero; 584 finished:; 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 591 #undef __FUNCT__ 592 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 593 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 594 { 595 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 596 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 597 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 598 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 599 PetscTruth roworiented=a->roworiented; 600 MatScalar *value = v,*ap,*aa = a->a,*bap; 601 602 PetscFunctionBegin; 603 if (roworiented) { 604 stepval = (n-1)*bs; 605 } else { 606 stepval = (m-1)*bs; 607 } 608 for (k=0; k<m; k++) { /* loop over added rows */ 609 row = im[k]; 610 if (row < 0) continue; 611 #if defined(PETSC_USE_BOPT_g) 612 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 613 #endif 614 rp = aj + ai[row]; 615 ap = aa + bs2*ai[row]; 616 rmax = imax[row]; 617 nrow = ailen[row]; 618 low = 0; 619 for (l=0; l<n; l++) { /* loop over added columns */ 620 if (in[l] < 0) continue; 621 #if defined(PETSC_USE_BOPT_g) 622 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 623 #endif 624 col = in[l]; 625 if (col < row) continue; /* ignore lower triangular block */ 626 if (roworiented) { 627 value = v + k*(stepval+bs)*bs + l*bs; 628 } else { 629 value = v + l*(stepval+bs)*bs + k*bs; 630 } 631 if (!sorted) low = 0; high = nrow; 632 while (high-low > 7) { 633 t = (low+high)/2; 634 if (rp[t] > col) high = t; 635 else low = t; 636 } 637 for (i=low; i<high; i++) { 638 if (rp[i] > col) break; 639 if (rp[i] == col) { 640 bap = ap + bs2*i; 641 if (roworiented) { 642 if (is == ADD_VALUES) { 643 for (ii=0; ii<bs; ii++,value+=stepval) { 644 for (jj=ii; jj<bs2; jj+=bs) { 645 bap[jj] += *value++; 646 } 647 } 648 } else { 649 for (ii=0; ii<bs; ii++,value+=stepval) { 650 for (jj=ii; jj<bs2; jj+=bs) { 651 bap[jj] = *value++; 652 } 653 } 654 } 655 } else { 656 if (is == ADD_VALUES) { 657 for (ii=0; ii<bs; ii++,value+=stepval) { 658 for (jj=0; jj<bs; jj++) { 659 *bap++ += *value++; 660 } 661 } 662 } else { 663 for (ii=0; ii<bs; ii++,value+=stepval) { 664 for (jj=0; jj<bs; jj++) { 665 *bap++ = *value++; 666 } 667 } 668 } 669 } 670 goto noinsert2; 671 } 672 } 673 if (nonew == 1) goto noinsert2; 674 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 675 if (nrow >= rmax) { 676 /* there is no extra room in row, therefore enlarge */ 677 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 678 MatScalar *new_a; 679 680 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 681 682 /* malloc new storage space */ 683 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 684 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 685 new_j = (int*)(new_a + bs2*new_nz); 686 new_i = new_j + new_nz; 687 688 /* copy over old data into new slots */ 689 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 690 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 691 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 692 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 693 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 694 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 695 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 696 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 697 /* free up old matrix storage */ 698 ierr = PetscFree(a->a);CHKERRQ(ierr); 699 if (!a->singlemalloc) { 700 ierr = PetscFree(a->i);CHKERRQ(ierr); 701 ierr = PetscFree(a->j);CHKERRQ(ierr); 702 } 703 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 704 a->singlemalloc = PETSC_TRUE; 705 706 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 707 rmax = imax[row] = imax[row] + CHUNKSIZE; 708 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 709 a->s_maxnz += bs2*CHUNKSIZE; 710 a->reallocs++; 711 a->s_nz++; 712 } 713 N = nrow++ - 1; 714 /* shift up all the later entries in this row */ 715 for (ii=N; ii>=i; ii--) { 716 rp[ii+1] = rp[ii]; 717 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 718 } 719 if (N >= i) { 720 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 721 } 722 rp[i] = col; 723 bap = ap + bs2*i; 724 if (roworiented) { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 bap[jj] = *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval) { 732 for (jj=0; jj<bs; jj++) { 733 *bap++ = *value++; 734 } 735 } 736 } 737 noinsert2:; 738 low = i; 739 } 740 ailen[row] = nrow; 741 } 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 747 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 748 { 749 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 750 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 751 int m = A->m,*ip,N,*ailen = a->ilen; 752 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 753 MatScalar *aa = a->a,*ap; 754 #if defined(PETSC_HAVE_SPOOLES) 755 PetscTruth flag; 756 #endif 757 758 PetscFunctionBegin; 759 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 760 761 if (m) rmax = ailen[0]; 762 for (i=1; i<mbs; i++) { 763 /* move each row back by the amount of empty slots (fshift) before it*/ 764 fshift += imax[i-1] - ailen[i-1]; 765 rmax = PetscMax(rmax,ailen[i]); 766 if (fshift) { 767 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 768 N = ailen[i]; 769 for (j=0; j<N; j++) { 770 ip[j-fshift] = ip[j]; 771 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 772 } 773 } 774 ai[i] = ai[i-1] + ailen[i-1]; 775 } 776 if (mbs) { 777 fshift += imax[mbs-1] - ailen[mbs-1]; 778 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 779 } 780 /* reset ilen and imax for each row */ 781 for (i=0; i<mbs; i++) { 782 ailen[i] = imax[i] = ai[i+1] - ai[i]; 783 } 784 a->s_nz = ai[mbs]; 785 786 /* diagonals may have moved, reset it */ 787 if (a->diag) { 788 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 789 } 790 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 791 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 792 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 793 a->reallocs); 794 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 795 a->reallocs = 0; 796 A->info.nz_unneeded = (PetscReal)fshift*bs2; 797 798 #if defined(PETSC_HAVE_SPOOLES) 799 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 800 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 801 #endif 802 803 PetscFunctionReturn(0); 804 } 805 806 /* 807 This function returns an array of flags which indicate the locations of contiguous 808 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 809 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 810 Assume: sizes should be long enough to hold all the values. 811 */ 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 814 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 815 { 816 int i,j,k,row; 817 PetscTruth flg; 818 819 PetscFunctionBegin; 820 for (i=0,j=0; i<n; j++) { 821 row = idx[i]; 822 if (row%bs!=0) { /* Not the begining of a block */ 823 sizes[j] = 1; 824 i++; 825 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 826 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 827 i++; 828 } else { /* Begining of the block, so check if the complete block exists */ 829 flg = PETSC_TRUE; 830 for (k=1; k<bs; k++) { 831 if (row+k != idx[i+k]) { /* break in the block */ 832 flg = PETSC_FALSE; 833 break; 834 } 835 } 836 if (flg == PETSC_TRUE) { /* No break in the bs */ 837 sizes[j] = bs; 838 i+= bs; 839 } else { 840 sizes[j] = 1; 841 i++; 842 } 843 } 844 } 845 *bs_max = j; 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 851 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 852 { 853 PetscFunctionBegin; 854 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 855 } 856 857 /* Only add/insert a(i,j) with i<=j (blocks). 858 Any a(i,j) with i>j input by user is ingored. 859 */ 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 863 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 864 { 865 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 866 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 867 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 868 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 869 int ridx,cidx,bs2=a->bs2,ierr; 870 MatScalar *ap,value,*aa=a->a,*bap; 871 872 PetscFunctionBegin; 873 874 for (k=0; k<m; k++) { /* loop over added rows */ 875 row = im[k]; /* row number */ 876 brow = row/bs; /* block row number */ 877 if (row < 0) continue; 878 #if defined(PETSC_USE_BOPT_g) 879 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 880 #endif 881 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 882 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 883 rmax = imax[brow]; /* maximum space allocated for this row */ 884 nrow = ailen[brow]; /* actual length of this row */ 885 low = 0; 886 887 for (l=0; l<n; l++) { /* loop over added columns */ 888 if (in[l] < 0) continue; 889 #if defined(PETSC_USE_BOPT_g) 890 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 891 #endif 892 col = in[l]; 893 bcol = col/bs; /* block col number */ 894 895 if (brow <= bcol){ 896 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 897 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 898 /* element value a(k,l) */ 899 if (roworiented) { 900 value = v[l + k*n]; 901 } else { 902 value = v[k + l*m]; 903 } 904 905 /* move pointer bap to a(k,l) quickly and add/insert value */ 906 if (!sorted) low = 0; high = nrow; 907 while (high-low > 7) { 908 t = (low+high)/2; 909 if (rp[t] > bcol) high = t; 910 else low = t; 911 } 912 for (i=low; i<high; i++) { 913 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 914 if (rp[i] > bcol) break; 915 if (rp[i] == bcol) { 916 bap = ap + bs2*i + bs*cidx + ridx; 917 if (is == ADD_VALUES) *bap += value; 918 else *bap = value; 919 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 920 if (brow == bcol && ridx < cidx){ 921 bap = ap + bs2*i + bs*ridx + cidx; 922 if (is == ADD_VALUES) *bap += value; 923 else *bap = value; 924 } 925 goto noinsert1; 926 } 927 } 928 929 if (nonew == 1) goto noinsert1; 930 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 931 if (nrow >= rmax) { 932 /* there is no extra room in row, therefore enlarge */ 933 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 934 MatScalar *new_a; 935 936 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 937 938 /* Malloc new storage space */ 939 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 940 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 941 new_j = (int*)(new_a + bs2*new_nz); 942 new_i = new_j + new_nz; 943 944 /* copy over old data into new slots */ 945 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 946 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 947 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 948 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 949 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 950 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 951 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 952 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 953 /* free up old matrix storage */ 954 ierr = PetscFree(a->a);CHKERRQ(ierr); 955 if (!a->singlemalloc) { 956 ierr = PetscFree(a->i);CHKERRQ(ierr); 957 ierr = PetscFree(a->j);CHKERRQ(ierr); 958 } 959 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 960 a->singlemalloc = PETSC_TRUE; 961 962 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 963 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 964 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 965 a->s_maxnz += bs2*CHUNKSIZE; 966 a->reallocs++; 967 a->s_nz++; 968 } 969 970 N = nrow++ - 1; 971 /* shift up all the later entries in this row */ 972 for (ii=N; ii>=i; ii--) { 973 rp[ii+1] = rp[ii]; 974 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 975 } 976 if (N>=i) { 977 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 978 } 979 rp[i] = bcol; 980 ap[bs2*i + bs*cidx + ridx] = value; 981 noinsert1:; 982 low = i; 983 /* } */ 984 } 985 } /* end of if .. if.. */ 986 } /* end of loop over added columns */ 987 ailen[brow] = nrow; 988 } /* end of loop over added rows */ 989 990 PetscFunctionReturn(0); 991 } 992 993 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 994 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 995 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 996 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 997 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 998 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 999 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1000 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1001 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1002 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1003 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1004 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1005 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1006 extern int MatZeroEntries_SeqSBAIJ(Mat); 1007 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1008 1009 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1010 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1011 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1012 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1013 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1014 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1017 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1018 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1019 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1020 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1021 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1024 1025 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1026 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1027 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1028 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1029 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1030 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1031 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1033 1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1042 1043 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1044 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1045 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1046 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1047 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1048 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1049 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1050 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1051 1052 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1053 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1054 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1055 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1056 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1057 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1058 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1059 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1060 1061 #ifdef HAVE_MatICCFactor 1062 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1065 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 1066 { 1067 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1068 Mat outA; 1069 int ierr; 1070 PetscTruth row_identity,col_identity; 1071 1072 PetscFunctionBegin; 1073 /* 1074 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1075 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1076 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1077 if (!row_identity || !col_identity) { 1078 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1079 } 1080 */ 1081 1082 outA = inA; 1083 inA->factor = FACTOR_CHOLESKY; 1084 1085 if (!a->diag) { 1086 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1087 } 1088 /* 1089 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1090 for ILU(0) factorization with natural ordering 1091 */ 1092 switch (a->bs) { 1093 case 1: 1094 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1095 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1096 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1097 case 2: 1098 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1099 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1100 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1101 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1102 break; 1103 case 3: 1104 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1105 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1106 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1107 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1108 break; 1109 case 4: 1110 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1111 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1112 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1113 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1114 break; 1115 case 5: 1116 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1117 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1118 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1119 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1120 break; 1121 case 6: 1122 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1123 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1124 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1125 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1126 break; 1127 case 7: 1128 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1129 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1130 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1131 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1132 break; 1133 default: 1134 a->row = row; 1135 a->icol = col; 1136 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1137 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1138 1139 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1140 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1141 PetscLogObjectParent(inA,a->icol); 1142 1143 if (!a->solve_work) { 1144 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1145 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1146 } 1147 } 1148 1149 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1150 1151 PetscFunctionReturn(0); 1152 } 1153 #endif 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1157 int MatPrintHelp_SeqSBAIJ(Mat A) 1158 { 1159 static PetscTruth called = PETSC_FALSE; 1160 MPI_Comm comm = A->comm; 1161 int ierr; 1162 1163 PetscFunctionBegin; 1164 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1165 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1166 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 EXTERN_C_BEGIN 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1173 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1174 { 1175 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1176 int i,nz,n; 1177 1178 PetscFunctionBegin; 1179 nz = baij->s_maxnz; 1180 n = mat->n; 1181 for (i=0; i<nz; i++) { 1182 baij->j[i] = indices[i]; 1183 } 1184 baij->s_nz = nz; 1185 for (i=0; i<n; i++) { 1186 baij->ilen[i] = baij->imax[i]; 1187 } 1188 1189 PetscFunctionReturn(0); 1190 } 1191 EXTERN_C_END 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1195 /*@ 1196 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1197 in the matrix. 1198 1199 Input Parameters: 1200 + mat - the SeqSBAIJ matrix 1201 - indices - the column indices 1202 1203 Level: advanced 1204 1205 Notes: 1206 This can be called if you have precomputed the nonzero structure of the 1207 matrix and want to provide it to the matrix object to improve the performance 1208 of the MatSetValues() operation. 1209 1210 You MUST have set the correct numbers of nonzeros per row in the call to 1211 MatCreateSeqSBAIJ(). 1212 1213 MUST be called before any calls to MatSetValues() 1214 1215 .seealso: MatCreateSeqSBAIJ 1216 @*/ 1217 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1218 { 1219 int ierr,(*f)(Mat,int *); 1220 1221 PetscFunctionBegin; 1222 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1223 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1224 if (f) { 1225 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1226 } else { 1227 SETERRQ(1,"Wrong type of matrix to set column indices"); 1228 } 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1234 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1235 { 1236 int ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1245 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array) 1246 { 1247 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1248 PetscFunctionBegin; 1249 *array = a->a; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1255 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array) 1256 { 1257 PetscFunctionBegin; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #include "petscblaslapack.h" 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1264 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1265 { 1266 int ierr,one=1; 1267 Mat_SeqSBAIJ *x = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data; 1268 1269 PetscFunctionBegin; 1270 if (str == SAME_NONZERO_PATTERN) { 1271 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1272 } else { 1273 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1274 } 1275 PetscFunctionReturn(0); 1276 } 1277 1278 /* -------------------------------------------------------------------*/ 1279 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1280 MatGetRow_SeqSBAIJ, 1281 MatRestoreRow_SeqSBAIJ, 1282 MatMult_SeqSBAIJ_N, 1283 MatMultAdd_SeqSBAIJ_N, 1284 MatMultTranspose_SeqSBAIJ, 1285 MatMultTransposeAdd_SeqSBAIJ, 1286 MatSolve_SeqSBAIJ_N, 1287 0, 1288 0, 1289 0, 1290 0, 1291 MatCholeskyFactor_SeqSBAIJ, 1292 MatRelax_SeqSBAIJ, 1293 MatTranspose_SeqSBAIJ, 1294 MatGetInfo_SeqSBAIJ, 1295 MatEqual_SeqSBAIJ, 1296 MatGetDiagonal_SeqSBAIJ, 1297 MatDiagonalScale_SeqSBAIJ, 1298 MatNorm_SeqSBAIJ, 1299 0, 1300 MatAssemblyEnd_SeqSBAIJ, 1301 0, 1302 MatSetOption_SeqSBAIJ, 1303 MatZeroEntries_SeqSBAIJ, 1304 MatZeroRows_SeqSBAIJ, 1305 0, 1306 0, 1307 MatCholeskyFactorSymbolic_SeqSBAIJ, 1308 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1309 MatSetUpPreallocation_SeqSBAIJ, 1310 0, 1311 MatICCFactorSymbolic_SeqSBAIJ, 1312 MatGetArray_SeqSBAIJ, 1313 MatRestoreArray_SeqSBAIJ, 1314 MatDuplicate_SeqSBAIJ, 1315 0, 1316 0, 1317 0, 1318 0, 1319 MatAXPY_SeqSBAIJ, 1320 MatGetSubMatrices_SeqSBAIJ, 1321 MatIncreaseOverlap_SeqSBAIJ, 1322 MatGetValues_SeqSBAIJ, 1323 0, 1324 MatPrintHelp_SeqSBAIJ, 1325 MatScale_SeqSBAIJ, 1326 0, 1327 0, 1328 0, 1329 MatGetBlockSize_SeqSBAIJ, 1330 MatGetRowIJ_SeqSBAIJ, 1331 MatRestoreRowIJ_SeqSBAIJ, 1332 0, 1333 0, 1334 0, 1335 0, 1336 0, 1337 0, 1338 MatSetValuesBlocked_SeqSBAIJ, 1339 MatGetSubMatrix_SeqSBAIJ, 1340 0, 1341 0, 1342 MatGetPetscMaps_Petsc, 1343 0, 1344 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 MatGetRowMax_SeqSBAIJ}; 1350 1351 EXTERN_C_BEGIN 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1354 int MatStoreValues_SeqSBAIJ(Mat mat) 1355 { 1356 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1357 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1358 int ierr; 1359 1360 PetscFunctionBegin; 1361 if (aij->nonew != 1) { 1362 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1363 } 1364 1365 /* allocate space for values if not already there */ 1366 if (!aij->saved_values) { 1367 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1368 } 1369 1370 /* copy values over */ 1371 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 EXTERN_C_END 1375 1376 EXTERN_C_BEGIN 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1379 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1380 { 1381 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1382 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1383 1384 PetscFunctionBegin; 1385 if (aij->nonew != 1) { 1386 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1387 } 1388 if (!aij->saved_values) { 1389 SETERRQ(1,"Must call MatStoreValues(A);first"); 1390 } 1391 1392 /* copy values over */ 1393 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 EXTERN_C_END 1397 1398 EXTERN_C_BEGIN 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1401 int MatCreate_SeqSBAIJ(Mat B) 1402 { 1403 Mat_SeqSBAIJ *b; 1404 int ierr,size; 1405 1406 PetscFunctionBegin; 1407 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1408 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1409 B->m = B->M = PetscMax(B->m,B->M); 1410 B->n = B->N = PetscMax(B->n,B->N); 1411 1412 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1413 B->data = (void*)b; 1414 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1415 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1416 B->ops->destroy = MatDestroy_SeqSBAIJ; 1417 B->ops->view = MatView_SeqSBAIJ; 1418 B->factor = 0; 1419 B->lupivotthreshold = 1.0; 1420 B->mapping = 0; 1421 b->row = 0; 1422 b->icol = 0; 1423 b->reallocs = 0; 1424 b->saved_values = 0; 1425 1426 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1427 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1428 1429 b->sorted = PETSC_FALSE; 1430 b->roworiented = PETSC_TRUE; 1431 b->nonew = 0; 1432 b->diag = 0; 1433 b->solve_work = 0; 1434 b->mult_work = 0; 1435 B->spptr = 0; 1436 b->keepzeroedrows = PETSC_FALSE; 1437 1438 b->inew = 0; 1439 b->jnew = 0; 1440 b->anew = 0; 1441 b->a2anew = 0; 1442 b->permute = PETSC_FALSE; 1443 1444 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1445 "MatStoreValues_SeqSBAIJ", 1446 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1447 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1448 "MatRetrieveValues_SeqSBAIJ", 1449 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1450 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1451 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1452 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1453 PetscFunctionReturn(0); 1454 } 1455 EXTERN_C_END 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1459 /*@C 1460 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1461 compressed row) format. For good matrix assembly performance the 1462 user should preallocate the matrix storage by setting the parameter nz 1463 (or the array nnz). By setting these parameters accurately, performance 1464 during matrix assembly can be increased by more than a factor of 50. 1465 1466 Collective on Mat 1467 1468 Input Parameters: 1469 + A - the symmetric matrix 1470 . bs - size of block 1471 . nz - number of block nonzeros per block row (same for all rows) 1472 - nnz - array containing the number of block nonzeros in the upper triangular plus 1473 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1474 1475 Options Database Keys: 1476 . -mat_no_unroll - uses code that does not unroll the loops in the 1477 block calculations (much slower) 1478 . -mat_block_size - size of the blocks to use 1479 1480 Level: intermediate 1481 1482 Notes: 1483 Specify the preallocated storage with either nz or nnz (not both). 1484 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1485 allocation. For additional details, see the users manual chapter on 1486 matrices. 1487 1488 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1489 @*/ 1490 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1491 { 1492 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1493 int i,len,ierr,mbs,bs2; 1494 PetscTruth flg; 1495 int s_nz; 1496 1497 PetscFunctionBegin; 1498 B->preallocated = PETSC_TRUE; 1499 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1500 mbs = B->m/bs; 1501 bs2 = bs*bs; 1502 1503 if (mbs*bs != B->m) { 1504 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1505 } 1506 1507 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1508 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1509 if (nnz) { 1510 for (i=0; i<mbs; i++) { 1511 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1512 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1513 } 1514 } 1515 1516 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1517 if (!flg) { 1518 switch (bs) { 1519 case 1: 1520 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1521 B->ops->solve = MatSolve_SeqSBAIJ_1; 1522 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1523 B->ops->mult = MatMult_SeqSBAIJ_1; 1524 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1525 break; 1526 case 2: 1527 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1528 B->ops->solve = MatSolve_SeqSBAIJ_2; 1529 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1530 B->ops->mult = MatMult_SeqSBAIJ_2; 1531 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1532 break; 1533 case 3: 1534 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1535 B->ops->solve = MatSolve_SeqSBAIJ_3; 1536 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1537 B->ops->mult = MatMult_SeqSBAIJ_3; 1538 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1539 break; 1540 case 4: 1541 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1542 B->ops->solve = MatSolve_SeqSBAIJ_4; 1543 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1544 B->ops->mult = MatMult_SeqSBAIJ_4; 1545 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1546 break; 1547 case 5: 1548 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1549 B->ops->solve = MatSolve_SeqSBAIJ_5; 1550 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1551 B->ops->mult = MatMult_SeqSBAIJ_5; 1552 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1553 break; 1554 case 6: 1555 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1556 B->ops->solve = MatSolve_SeqSBAIJ_6; 1557 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1558 B->ops->mult = MatMult_SeqSBAIJ_6; 1559 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1560 break; 1561 case 7: 1562 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1563 B->ops->solve = MatSolve_SeqSBAIJ_7; 1564 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1565 B->ops->mult = MatMult_SeqSBAIJ_7; 1566 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1567 break; 1568 } 1569 } 1570 1571 b->mbs = mbs; 1572 b->nbs = mbs; 1573 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1574 if (!nnz) { 1575 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1576 else if (nz <= 0) nz = 1; 1577 for (i=0; i<mbs; i++) { 1578 b->imax[i] = nz; 1579 } 1580 nz = nz*mbs; /* total nz */ 1581 } else { 1582 nz = 0; 1583 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1584 } 1585 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1586 s_nz = nz; 1587 1588 /* allocate the matrix space */ 1589 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1590 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1591 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1592 b->j = (int*)(b->a + s_nz*bs2); 1593 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1594 b->i = b->j + s_nz; 1595 b->singlemalloc = PETSC_TRUE; 1596 1597 /* pointer to beginning of each row */ 1598 b->i[0] = 0; 1599 for (i=1; i<mbs+1; i++) { 1600 b->i[i] = b->i[i-1] + b->imax[i-1]; 1601 } 1602 1603 /* b->ilen will count nonzeros in each block row so far. */ 1604 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1605 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1606 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1607 1608 b->bs = bs; 1609 b->bs2 = bs2; 1610 b->s_nz = 0; 1611 b->s_maxnz = s_nz*bs2; 1612 1613 b->inew = 0; 1614 b->jnew = 0; 1615 b->anew = 0; 1616 b->a2anew = 0; 1617 b->permute = PETSC_FALSE; 1618 PetscFunctionReturn(0); 1619 } 1620 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatCreateSeqSBAIJ" 1624 /*@C 1625 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1626 compressed row) format. For good matrix assembly performance the 1627 user should preallocate the matrix storage by setting the parameter nz 1628 (or the array nnz). By setting these parameters accurately, performance 1629 during matrix assembly can be increased by more than a factor of 50. 1630 1631 Collective on MPI_Comm 1632 1633 Input Parameters: 1634 + comm - MPI communicator, set to PETSC_COMM_SELF 1635 . bs - size of block 1636 . m - number of rows, or number of columns 1637 . nz - number of block nonzeros per block row (same for all rows) 1638 - nnz - array containing the number of block nonzeros in the upper triangular plus 1639 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1640 1641 Output Parameter: 1642 . A - the symmetric matrix 1643 1644 Options Database Keys: 1645 . -mat_no_unroll - uses code that does not unroll the loops in the 1646 block calculations (much slower) 1647 . -mat_block_size - size of the blocks to use 1648 1649 Level: intermediate 1650 1651 Notes: 1652 1653 Specify the preallocated storage with either nz or nnz (not both). 1654 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1655 allocation. For additional details, see the users manual chapter on 1656 matrices. 1657 1658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1659 @*/ 1660 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1661 { 1662 int ierr; 1663 1664 PetscFunctionBegin; 1665 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1666 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1667 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1673 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1674 { 1675 Mat C; 1676 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1677 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1678 1679 PetscFunctionBegin; 1680 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1681 1682 *B = 0; 1683 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1684 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1685 c = (Mat_SeqSBAIJ*)C->data; 1686 1687 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1688 C->preallocated = PETSC_TRUE; 1689 C->factor = A->factor; 1690 c->row = 0; 1691 c->icol = 0; 1692 c->saved_values = 0; 1693 c->keepzeroedrows = a->keepzeroedrows; 1694 C->assembled = PETSC_TRUE; 1695 1696 c->bs = a->bs; 1697 c->bs2 = a->bs2; 1698 c->mbs = a->mbs; 1699 c->nbs = a->nbs; 1700 1701 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1702 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1703 for (i=0; i<mbs; i++) { 1704 c->imax[i] = a->imax[i]; 1705 c->ilen[i] = a->ilen[i]; 1706 } 1707 1708 /* allocate the matrix space */ 1709 c->singlemalloc = PETSC_TRUE; 1710 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1711 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1712 c->j = (int*)(c->a + nz*bs2); 1713 c->i = c->j + nz; 1714 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1715 if (mbs > 0) { 1716 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1717 if (cpvalues == MAT_COPY_VALUES) { 1718 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1719 } else { 1720 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1721 } 1722 } 1723 1724 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1725 c->sorted = a->sorted; 1726 c->roworiented = a->roworiented; 1727 c->nonew = a->nonew; 1728 1729 if (a->diag) { 1730 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1731 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1732 for (i=0; i<mbs; i++) { 1733 c->diag[i] = a->diag[i]; 1734 } 1735 } else c->diag = 0; 1736 c->s_nz = a->s_nz; 1737 c->s_maxnz = a->s_maxnz; 1738 c->solve_work = 0; 1739 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1740 c->mult_work = 0; 1741 *B = C; 1742 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 EXTERN_C_BEGIN 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1749 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1750 { 1751 Mat_SeqSBAIJ *a; 1752 Mat B; 1753 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1754 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1755 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1756 int *masked,nmask,tmp,bs2,ishift; 1757 PetscScalar *aa; 1758 MPI_Comm comm = ((PetscObject)viewer)->comm; 1759 1760 PetscFunctionBegin; 1761 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1762 bs2 = bs*bs; 1763 1764 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1765 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1766 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1767 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1768 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1769 M = header[1]; N = header[2]; nz = header[3]; 1770 1771 if (header[3] < 0) { 1772 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1773 } 1774 1775 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1776 1777 /* 1778 This code adds extra rows to make sure the number of rows is 1779 divisible by the blocksize 1780 */ 1781 mbs = M/bs; 1782 extra_rows = bs - M + bs*(mbs); 1783 if (extra_rows == bs) extra_rows = 0; 1784 else mbs++; 1785 if (extra_rows) { 1786 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1787 } 1788 1789 /* read in row lengths */ 1790 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1791 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1792 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1793 1794 /* read in column indices */ 1795 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1796 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1797 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1798 1799 /* loop over row lengths determining block row lengths */ 1800 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1801 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1802 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1803 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1804 masked = mask + mbs; 1805 rowcount = 0; nzcount = 0; 1806 for (i=0; i<mbs; i++) { 1807 nmask = 0; 1808 for (j=0; j<bs; j++) { 1809 kmax = rowlengths[rowcount]; 1810 for (k=0; k<kmax; k++) { 1811 tmp = jj[nzcount++]/bs; /* block col. index */ 1812 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1813 } 1814 rowcount++; 1815 } 1816 s_browlengths[i] += nmask; 1817 1818 /* zero out the mask elements we set */ 1819 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1820 } 1821 1822 /* create our matrix */ 1823 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1824 B = *A; 1825 a = (Mat_SeqSBAIJ*)B->data; 1826 1827 /* set matrix "i" values */ 1828 a->i[0] = 0; 1829 for (i=1; i<= mbs; i++) { 1830 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1831 a->ilen[i-1] = s_browlengths[i-1]; 1832 } 1833 a->s_nz = a->i[mbs]; 1834 1835 /* read in nonzero values */ 1836 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1837 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1838 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1839 1840 /* set "a" and "j" values into matrix */ 1841 nzcount = 0; jcount = 0; 1842 for (i=0; i<mbs; i++) { 1843 nzcountb = nzcount; 1844 nmask = 0; 1845 for (j=0; j<bs; j++) { 1846 kmax = rowlengths[i*bs+j]; 1847 for (k=0; k<kmax; k++) { 1848 tmp = jj[nzcount++]/bs; /* block col. index */ 1849 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1850 } 1851 } 1852 /* sort the masked values */ 1853 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1854 1855 /* set "j" values into matrix */ 1856 maskcount = 1; 1857 for (j=0; j<nmask; j++) { 1858 a->j[jcount++] = masked[j]; 1859 mask[masked[j]] = maskcount++; 1860 } 1861 1862 /* set "a" values into matrix */ 1863 ishift = bs2*a->i[i]; 1864 for (j=0; j<bs; j++) { 1865 kmax = rowlengths[i*bs+j]; 1866 for (k=0; k<kmax; k++) { 1867 tmp = jj[nzcountb]/bs ; /* block col. index */ 1868 if (tmp >= i){ 1869 block = mask[tmp] - 1; 1870 point = jj[nzcountb] - bs*tmp; 1871 idx = ishift + bs2*block + j + bs*point; 1872 a->a[idx] = aa[nzcountb]; 1873 } 1874 nzcountb++; 1875 } 1876 } 1877 /* zero out the mask elements we set */ 1878 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1879 } 1880 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1881 1882 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1883 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1884 ierr = PetscFree(aa);CHKERRQ(ierr); 1885 ierr = PetscFree(jj);CHKERRQ(ierr); 1886 ierr = PetscFree(mask);CHKERRQ(ierr); 1887 1888 B->assembled = PETSC_TRUE; 1889 ierr = MatView_Private(B);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 EXTERN_C_END 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1896 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1897 { 1898 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1899 MatScalar *aa=a->a,*v,*v1; 1900 PetscScalar *x,*b,*t,sum,d; 1901 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1902 int nz,nz1,*vj,*vj1,i; 1903 1904 PetscFunctionBegin; 1905 its = its*lits; 1906 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1907 1908 if (bs > 1) 1909 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1910 1911 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1912 if (xx != bb) { 1913 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1914 } else { 1915 b = x; 1916 } 1917 1918 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1919 1920 if (flag & SOR_ZERO_INITIAL_GUESS) { 1921 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1922 for (i=0; i<m; i++) 1923 t[i] = b[i]; 1924 1925 for (i=0; i<m; i++){ 1926 d = *(aa + ai[i]); /* diag[i] */ 1927 v = aa + ai[i] + 1; 1928 vj = aj + ai[i] + 1; 1929 nz = ai[i+1] - ai[i] - 1; 1930 x[i] = omega*t[i]/d; 1931 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1932 PetscLogFlops(2*nz-1); 1933 } 1934 } 1935 1936 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1937 for (i=0; i<m; i++) 1938 t[i] = b[i]; 1939 1940 for (i=0; i<m-1; i++){ /* update rhs */ 1941 v = aa + ai[i] + 1; 1942 vj = aj + ai[i] + 1; 1943 nz = ai[i+1] - ai[i] - 1; 1944 while (nz--) t[*vj++] -= x[i]*(*v++); 1945 PetscLogFlops(2*nz-1); 1946 } 1947 for (i=m-1; i>=0; i--){ 1948 d = *(aa + ai[i]); 1949 v = aa + ai[i] + 1; 1950 vj = aj + ai[i] + 1; 1951 nz = ai[i+1] - ai[i] - 1; 1952 sum = t[i]; 1953 while (nz--) sum -= x[*vj++]*(*v++); 1954 PetscLogFlops(2*nz-1); 1955 x[i] = (1-omega)*x[i] + omega*sum/d; 1956 } 1957 } 1958 its--; 1959 } 1960 1961 while (its--) { 1962 /* 1963 forward sweep: 1964 for i=0,...,m-1: 1965 sum[i] = (b[i] - U(i,:)x )/d[i]; 1966 x[i] = (1-omega)x[i] + omega*sum[i]; 1967 b = b - x[i]*U^T(i,:); 1968 1969 */ 1970 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1971 for (i=0; i<m; i++) 1972 t[i] = b[i]; 1973 1974 for (i=0; i<m; i++){ 1975 d = *(aa + ai[i]); /* diag[i] */ 1976 v = aa + ai[i] + 1; v1=v; 1977 vj = aj + ai[i] + 1; vj1=vj; 1978 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1979 sum = t[i]; 1980 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1981 x[i] = (1-omega)*x[i] + omega*sum/d; 1982 while (nz--) t[*vj++] -= x[i]*(*v++); 1983 PetscLogFlops(4*nz-2); 1984 } 1985 } 1986 1987 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1988 /* 1989 backward sweep: 1990 b = b - x[i]*U^T(i,:), i=0,...,n-2 1991 for i=m-1,...,0: 1992 sum[i] = (b[i] - U(i,:)x )/d[i]; 1993 x[i] = (1-omega)x[i] + omega*sum[i]; 1994 */ 1995 for (i=0; i<m; i++) 1996 t[i] = b[i]; 1997 1998 for (i=0; i<m-1; i++){ /* update rhs */ 1999 v = aa + ai[i] + 1; 2000 vj = aj + ai[i] + 1; 2001 nz = ai[i+1] - ai[i] - 1; 2002 while (nz--) t[*vj++] -= x[i]*(*v++); 2003 PetscLogFlops(2*nz-1); 2004 } 2005 for (i=m-1; i>=0; i--){ 2006 d = *(aa + ai[i]); 2007 v = aa + ai[i] + 1; 2008 vj = aj + ai[i] + 1; 2009 nz = ai[i+1] - ai[i] - 1; 2010 sum = t[i]; 2011 while (nz--) sum -= x[*vj++]*(*v++); 2012 PetscLogFlops(2*nz-1); 2013 x[i] = (1-omega)*x[i] + omega*sum/d; 2014 } 2015 } 2016 } 2017 2018 ierr = PetscFree(t); CHKERRQ(ierr); 2019 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2020 if (bb != xx) { 2021 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2022 } 2023 2024 PetscFunctionReturn(0); 2025 } 2026 2027 2028 2029 2030 2031 2032