1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 default: 183 SETERRQ(PETSC_ERR_SUP,"unknown option"); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 191 { 192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 193 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 194 MatScalar *aa,*aa_i; 195 PetscScalar *v_i; 196 197 PetscFunctionBegin; 198 bs = a->bs; 199 ai = a->i; 200 aj = a->j; 201 aa = a->a; 202 bs2 = a->bs2; 203 204 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 205 206 bn = row/bs; /* Block number */ 207 bp = row % bs; /* Block position */ 208 M = ai[bn+1] - ai[bn]; 209 *ncols = bs*M; 210 211 if (v) { 212 *v = 0; 213 if (*ncols) { 214 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 215 for (i=0; i<M; i++) { /* for each block in the block row */ 216 v_i = *v + i*bs; 217 aa_i = aa + bs2*(ai[bn] + i); 218 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 219 } 220 } 221 } 222 223 if (cols) { 224 *cols = 0; 225 if (*ncols) { 226 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 227 for (i=0; i<M; i++) { /* for each block in the block row */ 228 cols_i = *cols + i*bs; 229 itmp = bs*aj[ai[bn] + i]; 230 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 231 } 232 } 233 } 234 235 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 236 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 237 #ifdef column_search 238 v_i = *v + M*bs; 239 cols_i = *cols + M*bs; 240 for (i=0; i<bn; i++){ /* for each block row */ 241 M = ai[i+1] - ai[i]; 242 for (j=0; j<M; j++){ 243 itmp = aj[ai[i] + j]; /* block column value */ 244 if (itmp == bn){ 245 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 246 for (k=0; k<bs; k++) { 247 *cols_i++ = i*bs+k; 248 *v_i++ = aa_i[k]; 249 } 250 *ncols += bs; 251 break; 252 } 253 } 254 } 255 #endif 256 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 263 { 264 int ierr; 265 266 PetscFunctionBegin; 267 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 268 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 275 { 276 int ierr; 277 PetscFunctionBegin; 278 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 285 { 286 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 287 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 288 PetscScalar *aa; 289 FILE *file; 290 291 PetscFunctionBegin; 292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 293 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 294 col_lens[0] = MAT_FILE_COOKIE; 295 296 col_lens[1] = A->m; 297 col_lens[2] = A->m; 298 col_lens[3] = a->s_nz*bs2; 299 300 /* store lengths of each row and write (including header) to file */ 301 count = 0; 302 for (i=0; i<a->mbs; i++) { 303 for (j=0; j<bs; j++) { 304 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 305 } 306 } 307 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 308 ierr = PetscFree(col_lens);CHKERRQ(ierr); 309 310 /* store column indices (zero start index) */ 311 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 312 count = 0; 313 for (i=0; i<a->mbs; i++) { 314 for (j=0; j<bs; j++) { 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 jj[count++] = bs*a->j[k] + l; 318 } 319 } 320 } 321 } 322 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 323 ierr = PetscFree(jj);CHKERRQ(ierr); 324 325 /* store nonzero values */ 326 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 327 count = 0; 328 for (i=0; i<a->mbs; i++) { 329 for (j=0; j<bs; j++) { 330 for (k=a->i[i]; k<a->i[i+1]; k++) { 331 for (l=0; l<bs; l++) { 332 aa[count++] = a->a[bs2*k + l*bs + j]; 333 } 334 } 335 } 336 } 337 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 338 ierr = PetscFree(aa);CHKERRQ(ierr); 339 340 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 341 if (file) { 342 fprintf(file,"-matload_block_size %d\n",a->bs); 343 } 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 350 { 351 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 352 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 353 char *name; 354 PetscViewerFormat format; 355 356 PetscFunctionBegin; 357 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 358 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 359 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 360 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 361 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 362 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 363 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 365 for (i=0; i<a->mbs; i++) { 366 for (j=0; j<bs; j++) { 367 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 368 for (k=a->i[i]; k<a->i[i+1]; k++) { 369 for (l=0; l<bs; l++) { 370 #if defined(PETSC_USE_COMPLEX) 371 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 372 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 373 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 374 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } 380 #else 381 if (a->a[bs2*k + l*bs + j] != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 383 } 384 #endif 385 } 386 } 387 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 #if defined(PETSC_USE_COMPLEX) 399 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 400 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 401 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 402 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 403 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 404 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } else { 406 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } 408 #else 409 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 410 #endif 411 } 412 } 413 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 414 } 415 } 416 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 417 } 418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 425 { 426 Mat A = (Mat) Aa; 427 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 428 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 429 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 430 MatScalar *aa; 431 MPI_Comm comm; 432 PetscViewer viewer; 433 434 PetscFunctionBegin; 435 /* 436 This is nasty. If this is called from an originally parallel matrix 437 then all processes call this,but only the first has the matrix so the 438 rest should return immediately. 439 */ 440 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 441 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 442 if (rank) PetscFunctionReturn(0); 443 444 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 445 446 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 447 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 448 449 /* loop over matrix elements drawing boxes */ 450 color = PETSC_DRAW_BLUE; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) >= 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 color = PETSC_DRAW_CYAN; 465 for (i=0,row=0; i<mbs; i++,row+=bs) { 466 for (j=a->i[i]; j<a->i[i+1]; j++) { 467 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 468 x_l = a->j[j]*bs; x_r = x_l + 1.0; 469 aa = a->a + j*bs2; 470 for (k=0; k<bs; k++) { 471 for (l=0; l<bs; l++) { 472 if (PetscRealPart(*aa++) != 0.) continue; 473 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 474 } 475 } 476 } 477 } 478 479 color = PETSC_DRAW_RED; 480 for (i=0,row=0; i<mbs; i++,row+=bs) { 481 for (j=a->i[i]; j<a->i[i+1]; j++) { 482 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 483 x_l = a->j[j]*bs; x_r = x_l + 1.0; 484 aa = a->a + j*bs2; 485 for (k=0; k<bs; k++) { 486 for (l=0; l<bs; l++) { 487 if (PetscRealPart(*aa++) <= 0.) continue; 488 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 499 { 500 int ierr; 501 PetscReal xl,yl,xr,yr,w,h; 502 PetscDraw draw; 503 PetscTruth isnull; 504 505 PetscFunctionBegin; 506 507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 509 510 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 511 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 512 xr += w; yr += h; xl = -w; yl = -h; 513 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 514 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 515 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "MatView_SeqSBAIJ" 521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 522 { 523 int ierr; 524 PetscTruth issocket,isascii,isbinary,isdraw; 525 526 PetscFunctionBegin; 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 530 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 531 if (issocket) { 532 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 533 } else if (isascii){ 534 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 535 } else if (isbinary) { 536 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 537 } else if (isdraw) { 538 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 539 } else { 540 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 548 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 549 { 550 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 551 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 552 int *ai = a->i,*ailen = a->ilen; 553 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 554 MatScalar *ap,*aa = a->a,zero = 0.0; 555 556 PetscFunctionBegin; 557 for (k=0; k<m; k++) { /* loop over rows */ 558 row = im[k]; brow = row/bs; 559 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 560 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 561 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 562 nrow = ailen[brow]; 563 for (l=0; l<n; l++) { /* loop over columns */ 564 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 565 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 566 col = in[l] ; 567 bcol = col/bs; 568 cidx = col%bs; 569 ridx = row%bs; 570 high = nrow; 571 low = 0; /* assume unsorted */ 572 while (high-low > 5) { 573 t = (low+high)/2; 574 if (rp[t] > bcol) high = t; 575 else low = t; 576 } 577 for (i=low; i<high; i++) { 578 if (rp[i] > bcol) break; 579 if (rp[i] == bcol) { 580 *v++ = ap[bs2*i+bs*cidx+ridx]; 581 goto finished; 582 } 583 } 584 *v++ = zero; 585 finished:; 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 595 { 596 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 597 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 598 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 599 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 600 PetscTruth roworiented=a->roworiented; 601 const MatScalar *value = v; 602 MatScalar *ap,*aa = a->a,*bap; 603 604 PetscFunctionBegin; 605 if (roworiented) { 606 stepval = (n-1)*bs; 607 } else { 608 stepval = (m-1)*bs; 609 } 610 for (k=0; k<m; k++) { /* loop over added rows */ 611 row = im[k]; 612 if (row < 0) continue; 613 #if defined(PETSC_USE_BOPT_g) 614 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 615 #endif 616 rp = aj + ai[row]; 617 ap = aa + bs2*ai[row]; 618 rmax = imax[row]; 619 nrow = ailen[row]; 620 low = 0; 621 for (l=0; l<n; l++) { /* loop over added columns */ 622 if (in[l] < 0) continue; 623 #if defined(PETSC_USE_BOPT_g) 624 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 625 #endif 626 col = in[l]; 627 if (col < row) continue; /* ignore lower triangular block */ 628 if (roworiented) { 629 value = v + k*(stepval+bs)*bs + l*bs; 630 } else { 631 value = v + l*(stepval+bs)*bs + k*bs; 632 } 633 if (!sorted) low = 0; high = nrow; 634 while (high-low > 7) { 635 t = (low+high)/2; 636 if (rp[t] > col) high = t; 637 else low = t; 638 } 639 for (i=low; i<high; i++) { 640 if (rp[i] > col) break; 641 if (rp[i] == col) { 642 bap = ap + bs2*i; 643 if (roworiented) { 644 if (is == ADD_VALUES) { 645 for (ii=0; ii<bs; ii++,value+=stepval) { 646 for (jj=ii; jj<bs2; jj+=bs) { 647 bap[jj] += *value++; 648 } 649 } 650 } else { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=ii; jj<bs2; jj+=bs) { 653 bap[jj] = *value++; 654 } 655 } 656 } 657 } else { 658 if (is == ADD_VALUES) { 659 for (ii=0; ii<bs; ii++,value+=stepval) { 660 for (jj=0; jj<bs; jj++) { 661 *bap++ += *value++; 662 } 663 } 664 } else { 665 for (ii=0; ii<bs; ii++,value+=stepval) { 666 for (jj=0; jj<bs; jj++) { 667 *bap++ = *value++; 668 } 669 } 670 } 671 } 672 goto noinsert2; 673 } 674 } 675 if (nonew == 1) goto noinsert2; 676 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 677 if (nrow >= rmax) { 678 /* there is no extra room in row, therefore enlarge */ 679 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 680 MatScalar *new_a; 681 682 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 683 684 /* malloc new storage space */ 685 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 686 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 687 new_j = (int*)(new_a + bs2*new_nz); 688 new_i = new_j + new_nz; 689 690 /* copy over old data into new slots */ 691 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 692 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 693 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 694 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 695 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 696 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 697 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 698 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 699 /* free up old matrix storage */ 700 ierr = PetscFree(a->a);CHKERRQ(ierr); 701 if (!a->singlemalloc) { 702 ierr = PetscFree(a->i);CHKERRQ(ierr); 703 ierr = PetscFree(a->j);CHKERRQ(ierr); 704 } 705 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 706 a->singlemalloc = PETSC_TRUE; 707 708 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 709 rmax = imax[row] = imax[row] + CHUNKSIZE; 710 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 711 a->s_maxnz += bs2*CHUNKSIZE; 712 a->reallocs++; 713 a->s_nz++; 714 } 715 N = nrow++ - 1; 716 /* shift up all the later entries in this row */ 717 for (ii=N; ii>=i; ii--) { 718 rp[ii+1] = rp[ii]; 719 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 720 } 721 if (N >= i) { 722 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 723 } 724 rp[i] = col; 725 bap = ap + bs2*i; 726 if (roworiented) { 727 for (ii=0; ii<bs; ii++,value+=stepval) { 728 for (jj=ii; jj<bs2; jj+=bs) { 729 bap[jj] = *value++; 730 } 731 } 732 } else { 733 for (ii=0; ii<bs; ii++,value+=stepval) { 734 for (jj=0; jj<bs; jj++) { 735 *bap++ = *value++; 736 } 737 } 738 } 739 noinsert2:; 740 low = i; 741 } 742 ailen[row] = nrow; 743 } 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 749 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 750 { 751 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 752 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 753 int m = A->m,*ip,N,*ailen = a->ilen; 754 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 755 MatScalar *aa = a->a,*ap; 756 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 757 PetscTruth flag; 758 #endif 759 760 PetscFunctionBegin; 761 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 762 763 if (m) rmax = ailen[0]; 764 for (i=1; i<mbs; i++) { 765 /* move each row back by the amount of empty slots (fshift) before it*/ 766 fshift += imax[i-1] - ailen[i-1]; 767 rmax = PetscMax(rmax,ailen[i]); 768 if (fshift) { 769 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 770 N = ailen[i]; 771 for (j=0; j<N; j++) { 772 ip[j-fshift] = ip[j]; 773 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 774 } 775 } 776 ai[i] = ai[i-1] + ailen[i-1]; 777 } 778 if (mbs) { 779 fshift += imax[mbs-1] - ailen[mbs-1]; 780 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 781 } 782 /* reset ilen and imax for each row */ 783 for (i=0; i<mbs; i++) { 784 ailen[i] = imax[i] = ai[i+1] - ai[i]; 785 } 786 a->s_nz = ai[mbs]; 787 788 /* diagonals may have moved, reset it */ 789 if (a->diag) { 790 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 791 } 792 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 793 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 794 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 795 a->reallocs); 796 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 797 a->reallocs = 0; 798 A->info.nz_unneeded = (PetscReal)fshift*bs2; 799 800 #if defined(PETSC_HAVE_SPOOLES) 801 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 802 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 803 #endif 804 #if defined(PETSC_HAVE_MUMPS) 805 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 806 if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); } 807 #endif 808 809 PetscFunctionReturn(0); 810 } 811 812 /* 813 This function returns an array of flags which indicate the locations of contiguous 814 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 815 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 816 Assume: sizes should be long enough to hold all the values. 817 */ 818 #undef __FUNCT__ 819 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 820 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 821 { 822 int i,j,k,row; 823 PetscTruth flg; 824 825 PetscFunctionBegin; 826 for (i=0,j=0; i<n; j++) { 827 row = idx[i]; 828 if (row%bs!=0) { /* Not the begining of a block */ 829 sizes[j] = 1; 830 i++; 831 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 832 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 833 i++; 834 } else { /* Begining of the block, so check if the complete block exists */ 835 flg = PETSC_TRUE; 836 for (k=1; k<bs; k++) { 837 if (row+k != idx[i+k]) { /* break in the block */ 838 flg = PETSC_FALSE; 839 break; 840 } 841 } 842 if (flg == PETSC_TRUE) { /* No break in the bs */ 843 sizes[j] = bs; 844 i+= bs; 845 } else { 846 sizes[j] = 1; 847 i++; 848 } 849 } 850 } 851 *bs_max = j; 852 PetscFunctionReturn(0); 853 } 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 857 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 858 { 859 PetscFunctionBegin; 860 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 861 } 862 863 /* Only add/insert a(i,j) with i<=j (blocks). 864 Any a(i,j) with i>j input by user is ingored. 865 */ 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 869 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 870 { 871 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 872 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 873 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 874 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 875 int ridx,cidx,bs2=a->bs2,ierr; 876 MatScalar *ap,value,*aa=a->a,*bap; 877 878 PetscFunctionBegin; 879 880 for (k=0; k<m; k++) { /* loop over added rows */ 881 row = im[k]; /* row number */ 882 brow = row/bs; /* block row number */ 883 if (row < 0) continue; 884 #if defined(PETSC_USE_BOPT_g) 885 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 886 #endif 887 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 888 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 889 rmax = imax[brow]; /* maximum space allocated for this row */ 890 nrow = ailen[brow]; /* actual length of this row */ 891 low = 0; 892 893 for (l=0; l<n; l++) { /* loop over added columns */ 894 if (in[l] < 0) continue; 895 #if defined(PETSC_USE_BOPT_g) 896 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 897 #endif 898 col = in[l]; 899 bcol = col/bs; /* block col number */ 900 901 if (brow <= bcol){ 902 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 903 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 904 /* element value a(k,l) */ 905 if (roworiented) { 906 value = v[l + k*n]; 907 } else { 908 value = v[k + l*m]; 909 } 910 911 /* move pointer bap to a(k,l) quickly and add/insert value */ 912 if (!sorted) low = 0; high = nrow; 913 while (high-low > 7) { 914 t = (low+high)/2; 915 if (rp[t] > bcol) high = t; 916 else low = t; 917 } 918 for (i=low; i<high; i++) { 919 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 920 if (rp[i] > bcol) break; 921 if (rp[i] == bcol) { 922 bap = ap + bs2*i + bs*cidx + ridx; 923 if (is == ADD_VALUES) *bap += value; 924 else *bap = value; 925 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 926 if (brow == bcol && ridx < cidx){ 927 bap = ap + bs2*i + bs*ridx + cidx; 928 if (is == ADD_VALUES) *bap += value; 929 else *bap = value; 930 } 931 goto noinsert1; 932 } 933 } 934 935 if (nonew == 1) goto noinsert1; 936 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 937 if (nrow >= rmax) { 938 /* there is no extra room in row, therefore enlarge */ 939 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 940 MatScalar *new_a; 941 942 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 943 944 /* Malloc new storage space */ 945 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 946 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 947 new_j = (int*)(new_a + bs2*new_nz); 948 new_i = new_j + new_nz; 949 950 /* copy over old data into new slots */ 951 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 952 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 953 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 954 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 955 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 956 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 957 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 958 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 959 /* free up old matrix storage */ 960 ierr = PetscFree(a->a);CHKERRQ(ierr); 961 if (!a->singlemalloc) { 962 ierr = PetscFree(a->i);CHKERRQ(ierr); 963 ierr = PetscFree(a->j);CHKERRQ(ierr); 964 } 965 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 966 a->singlemalloc = PETSC_TRUE; 967 968 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 969 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 970 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 971 a->s_maxnz += bs2*CHUNKSIZE; 972 a->reallocs++; 973 a->s_nz++; 974 } 975 976 N = nrow++ - 1; 977 /* shift up all the later entries in this row */ 978 for (ii=N; ii>=i; ii--) { 979 rp[ii+1] = rp[ii]; 980 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 981 } 982 if (N>=i) { 983 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 984 } 985 rp[i] = bcol; 986 ap[bs2*i + bs*cidx + ridx] = value; 987 noinsert1:; 988 low = i; 989 /* } */ 990 } 991 } /* end of if .. if.. */ 992 } /* end of loop over added columns */ 993 ailen[brow] = nrow; 994 } /* end of loop over added rows */ 995 996 PetscFunctionReturn(0); 997 } 998 999 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 1000 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 1001 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 1002 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1003 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1004 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1005 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1006 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1007 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1008 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1009 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1010 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1011 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1012 extern int MatZeroEntries_SeqSBAIJ(Mat); 1013 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1014 1015 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1017 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1018 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1019 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1020 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1021 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1022 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1024 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1025 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1026 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1027 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1028 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1029 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1030 1031 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1032 1033 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1035 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1036 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1037 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1038 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1039 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1040 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1041 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1048 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1049 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1050 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1051 1052 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1054 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1055 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1056 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1057 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1058 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1059 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1060 1061 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1063 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1064 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1065 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1066 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1067 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1068 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1069 1070 #ifdef HAVE_MatICCFactor 1071 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1074 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1075 { 1076 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1077 Mat outA; 1078 int ierr; 1079 PetscTruth row_identity,col_identity; 1080 1081 PetscFunctionBegin; 1082 /* 1083 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1084 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1085 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1086 if (!row_identity || !col_identity) { 1087 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1088 } 1089 */ 1090 1091 outA = inA; 1092 inA->factor = FACTOR_CHOLESKY; 1093 1094 if (!a->diag) { 1095 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1096 } 1097 /* 1098 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1099 for ILU(0) factorization with natural ordering 1100 */ 1101 switch (a->bs) { 1102 case 1: 1103 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1104 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1105 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1106 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1107 case 2: 1108 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1109 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1110 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1111 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1112 break; 1113 case 3: 1114 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1115 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1116 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1117 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1118 break; 1119 case 4: 1120 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1121 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1122 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1123 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1124 break; 1125 case 5: 1126 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1127 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1128 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1129 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1130 break; 1131 case 6: 1132 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1133 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1134 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1135 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1136 break; 1137 case 7: 1138 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1139 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1140 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1141 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1142 break; 1143 default: 1144 a->row = row; 1145 a->icol = col; 1146 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1147 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1148 1149 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1150 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1151 PetscLogObjectParent(inA,a->icol); 1152 1153 if (!a->solve_work) { 1154 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1155 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1156 } 1157 } 1158 1159 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1160 1161 PetscFunctionReturn(0); 1162 } 1163 #endif 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1167 int MatPrintHelp_SeqSBAIJ(Mat A) 1168 { 1169 static PetscTruth called = PETSC_FALSE; 1170 MPI_Comm comm = A->comm; 1171 int ierr; 1172 1173 PetscFunctionBegin; 1174 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1175 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1176 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 EXTERN_C_BEGIN 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1183 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1184 { 1185 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1186 int i,nz,n; 1187 1188 PetscFunctionBegin; 1189 nz = baij->s_maxnz; 1190 n = mat->n; 1191 for (i=0; i<nz; i++) { 1192 baij->j[i] = indices[i]; 1193 } 1194 baij->s_nz = nz; 1195 for (i=0; i<n; i++) { 1196 baij->ilen[i] = baij->imax[i]; 1197 } 1198 1199 PetscFunctionReturn(0); 1200 } 1201 EXTERN_C_END 1202 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1205 /*@ 1206 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1207 in the matrix. 1208 1209 Input Parameters: 1210 + mat - the SeqSBAIJ matrix 1211 - indices - the column indices 1212 1213 Level: advanced 1214 1215 Notes: 1216 This can be called if you have precomputed the nonzero structure of the 1217 matrix and want to provide it to the matrix object to improve the performance 1218 of the MatSetValues() operation. 1219 1220 You MUST have set the correct numbers of nonzeros per row in the call to 1221 MatCreateSeqSBAIJ(). 1222 1223 MUST be called before any calls to MatSetValues() 1224 1225 .seealso: MatCreateSeqSBAIJ 1226 @*/ 1227 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1228 { 1229 int ierr,(*f)(Mat,int *); 1230 1231 PetscFunctionBegin; 1232 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1233 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1234 if (f) { 1235 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1236 } else { 1237 SETERRQ(1,"Wrong type of matrix to set column indices"); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1244 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1245 { 1246 int ierr; 1247 1248 PetscFunctionBegin; 1249 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1255 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array) 1256 { 1257 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1258 PetscFunctionBegin; 1259 *array = a->a; 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1265 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array) 1266 { 1267 PetscFunctionBegin; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #include "petscblaslapack.h" 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1274 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1275 { 1276 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1277 int ierr,one=1,i,bs=y->bs,bs2,j; 1278 1279 PetscFunctionBegin; 1280 if (str == SAME_NONZERO_PATTERN) { 1281 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1282 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1283 if (y->xtoy && y->XtoY != X) { 1284 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1285 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1286 } 1287 if (!y->xtoy) { /* get xtoy */ 1288 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1289 y->XtoY = X; 1290 } 1291 bs2 = bs*bs; 1292 for (i=0; i<x->s_nz; i++) { 1293 j = 0; 1294 while (j < bs2){ 1295 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1296 j++; 1297 } 1298 } 1299 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1300 } else { 1301 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 /* -------------------------------------------------------------------*/ 1307 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1308 MatGetRow_SeqSBAIJ, 1309 MatRestoreRow_SeqSBAIJ, 1310 MatMult_SeqSBAIJ_N, 1311 MatMultAdd_SeqSBAIJ_N, 1312 MatMultTranspose_SeqSBAIJ, 1313 MatMultTransposeAdd_SeqSBAIJ, 1314 MatSolve_SeqSBAIJ_N, 1315 0, 1316 0, 1317 0, 1318 0, 1319 MatCholeskyFactor_SeqSBAIJ, 1320 MatRelax_SeqSBAIJ, 1321 MatTranspose_SeqSBAIJ, 1322 MatGetInfo_SeqSBAIJ, 1323 MatEqual_SeqSBAIJ, 1324 MatGetDiagonal_SeqSBAIJ, 1325 MatDiagonalScale_SeqSBAIJ, 1326 MatNorm_SeqSBAIJ, 1327 0, 1328 MatAssemblyEnd_SeqSBAIJ, 1329 0, 1330 MatSetOption_SeqSBAIJ, 1331 MatZeroEntries_SeqSBAIJ, 1332 MatZeroRows_SeqSBAIJ, 1333 0, 1334 0, 1335 MatCholeskyFactorSymbolic_SeqSBAIJ, 1336 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1337 MatSetUpPreallocation_SeqSBAIJ, 1338 0, 1339 MatICCFactorSymbolic_SeqSBAIJ, 1340 MatGetArray_SeqSBAIJ, 1341 MatRestoreArray_SeqSBAIJ, 1342 MatDuplicate_SeqSBAIJ, 1343 0, 1344 0, 1345 0, 1346 0, 1347 MatAXPY_SeqSBAIJ, 1348 MatGetSubMatrices_SeqSBAIJ, 1349 MatIncreaseOverlap_SeqSBAIJ, 1350 MatGetValues_SeqSBAIJ, 1351 0, 1352 MatPrintHelp_SeqSBAIJ, 1353 MatScale_SeqSBAIJ, 1354 0, 1355 0, 1356 0, 1357 MatGetBlockSize_SeqSBAIJ, 1358 MatGetRowIJ_SeqSBAIJ, 1359 MatRestoreRowIJ_SeqSBAIJ, 1360 0, 1361 0, 1362 0, 1363 0, 1364 0, 1365 0, 1366 MatSetValuesBlocked_SeqSBAIJ, 1367 MatGetSubMatrix_SeqSBAIJ, 1368 0, 1369 0, 1370 MatGetPetscMaps_Petsc, 1371 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 MatGetRowMax_SeqSBAIJ, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 0, 1391 #if !defined(PETSC_USE_COMPLEX) 1392 MatGetInertia_SeqSBAIJ 1393 #else 1394 0 1395 #endif 1396 }; 1397 1398 EXTERN_C_BEGIN 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1401 int MatStoreValues_SeqSBAIJ(Mat mat) 1402 { 1403 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1404 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1405 int ierr; 1406 1407 PetscFunctionBegin; 1408 if (aij->nonew != 1) { 1409 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1410 } 1411 1412 /* allocate space for values if not already there */ 1413 if (!aij->saved_values) { 1414 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1415 } 1416 1417 /* copy values over */ 1418 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 EXTERN_C_END 1422 1423 EXTERN_C_BEGIN 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1426 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1427 { 1428 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1429 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1430 1431 PetscFunctionBegin; 1432 if (aij->nonew != 1) { 1433 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1434 } 1435 if (!aij->saved_values) { 1436 SETERRQ(1,"Must call MatStoreValues(A);first"); 1437 } 1438 1439 /* copy values over */ 1440 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 EXTERN_C_END 1444 1445 EXTERN_C_BEGIN 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1448 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1449 { 1450 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1451 int i,len,ierr,mbs,bs2; 1452 PetscTruth flg; 1453 int s_nz; 1454 1455 PetscFunctionBegin; 1456 B->preallocated = PETSC_TRUE; 1457 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1458 mbs = B->m/bs; 1459 bs2 = bs*bs; 1460 1461 if (mbs*bs != B->m) { 1462 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1463 } 1464 1465 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1466 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1467 if (nnz) { 1468 for (i=0; i<mbs; i++) { 1469 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1470 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1471 } 1472 } 1473 1474 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1475 if (!flg) { 1476 switch (bs) { 1477 case 1: 1478 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1479 B->ops->solve = MatSolve_SeqSBAIJ_1; 1480 B->ops->solves = MatSolves_SeqSBAIJ_1; 1481 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1482 B->ops->mult = MatMult_SeqSBAIJ_1; 1483 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1484 break; 1485 case 2: 1486 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1487 B->ops->solve = MatSolve_SeqSBAIJ_2; 1488 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1489 B->ops->mult = MatMult_SeqSBAIJ_2; 1490 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1491 break; 1492 case 3: 1493 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1494 B->ops->solve = MatSolve_SeqSBAIJ_3; 1495 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1496 B->ops->mult = MatMult_SeqSBAIJ_3; 1497 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1498 break; 1499 case 4: 1500 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1501 B->ops->solve = MatSolve_SeqSBAIJ_4; 1502 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1503 B->ops->mult = MatMult_SeqSBAIJ_4; 1504 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1505 break; 1506 case 5: 1507 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1508 B->ops->solve = MatSolve_SeqSBAIJ_5; 1509 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1510 B->ops->mult = MatMult_SeqSBAIJ_5; 1511 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1512 break; 1513 case 6: 1514 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1515 B->ops->solve = MatSolve_SeqSBAIJ_6; 1516 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1517 B->ops->mult = MatMult_SeqSBAIJ_6; 1518 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1519 break; 1520 case 7: 1521 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1522 B->ops->solve = MatSolve_SeqSBAIJ_7; 1523 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1524 B->ops->mult = MatMult_SeqSBAIJ_7; 1525 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1526 break; 1527 } 1528 } 1529 1530 b->mbs = mbs; 1531 b->nbs = mbs; 1532 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1533 if (!nnz) { 1534 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1535 else if (nz <= 0) nz = 1; 1536 for (i=0; i<mbs; i++) { 1537 b->imax[i] = nz; 1538 } 1539 nz = nz*mbs; /* total nz */ 1540 } else { 1541 nz = 0; 1542 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1543 } 1544 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1545 s_nz = nz; 1546 1547 /* allocate the matrix space */ 1548 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1549 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1550 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1551 b->j = (int*)(b->a + s_nz*bs2); 1552 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1553 b->i = b->j + s_nz; 1554 b->singlemalloc = PETSC_TRUE; 1555 1556 /* pointer to beginning of each row */ 1557 b->i[0] = 0; 1558 for (i=1; i<mbs+1; i++) { 1559 b->i[i] = b->i[i-1] + b->imax[i-1]; 1560 } 1561 1562 /* b->ilen will count nonzeros in each block row so far. */ 1563 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1564 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1565 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1566 1567 b->bs = bs; 1568 b->bs2 = bs2; 1569 b->s_nz = 0; 1570 b->s_maxnz = s_nz*bs2; 1571 1572 b->inew = 0; 1573 b->jnew = 0; 1574 b->anew = 0; 1575 b->a2anew = 0; 1576 b->permute = PETSC_FALSE; 1577 PetscFunctionReturn(0); 1578 } 1579 EXTERN_C_END 1580 1581 EXTERN_C_BEGIN 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1584 int MatCreate_SeqSBAIJ(Mat B) 1585 { 1586 Mat_SeqSBAIJ *b; 1587 int ierr,size; 1588 1589 PetscFunctionBegin; 1590 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1591 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1592 B->m = B->M = PetscMax(B->m,B->M); 1593 B->n = B->N = PetscMax(B->n,B->N); 1594 1595 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1596 B->data = (void*)b; 1597 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1598 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1599 B->ops->destroy = MatDestroy_SeqSBAIJ; 1600 B->ops->view = MatView_SeqSBAIJ; 1601 B->factor = 0; 1602 B->lupivotthreshold = 1.0; 1603 B->mapping = 0; 1604 b->row = 0; 1605 b->icol = 0; 1606 b->reallocs = 0; 1607 b->saved_values = 0; 1608 1609 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1610 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1611 1612 b->sorted = PETSC_FALSE; 1613 b->roworiented = PETSC_TRUE; 1614 b->nonew = 0; 1615 b->diag = 0; 1616 b->solve_work = 0; 1617 b->mult_work = 0; 1618 B->spptr = 0; 1619 b->keepzeroedrows = PETSC_FALSE; 1620 b->xtoy = 0; 1621 b->XtoY = 0; 1622 1623 b->inew = 0; 1624 b->jnew = 0; 1625 b->anew = 0; 1626 b->a2anew = 0; 1627 b->permute = PETSC_FALSE; 1628 1629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1630 "MatStoreValues_SeqSBAIJ", 1631 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1633 "MatRetrieveValues_SeqSBAIJ", 1634 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1636 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1637 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1639 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1640 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1641 PetscFunctionReturn(0); 1642 } 1643 EXTERN_C_END 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1647 /*@C 1648 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1649 compressed row) format. For good matrix assembly performance the 1650 user should preallocate the matrix storage by setting the parameter nz 1651 (or the array nnz). By setting these parameters accurately, performance 1652 during matrix assembly can be increased by more than a factor of 50. 1653 1654 Collective on Mat 1655 1656 Input Parameters: 1657 + A - the symmetric matrix 1658 . bs - size of block 1659 . nz - number of block nonzeros per block row (same for all rows) 1660 - nnz - array containing the number of block nonzeros in the upper triangular plus 1661 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1662 1663 Options Database Keys: 1664 . -mat_no_unroll - uses code that does not unroll the loops in the 1665 block calculations (much slower) 1666 . -mat_block_size - size of the blocks to use 1667 1668 Level: intermediate 1669 1670 Notes: 1671 Specify the preallocated storage with either nz or nnz (not both). 1672 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1673 allocation. For additional details, see the users manual chapter on 1674 matrices. 1675 1676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1677 @*/ 1678 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1679 int ierr,(*f)(Mat,int,int,const int[]); 1680 1681 PetscFunctionBegin; 1682 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1683 if (f) { 1684 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatCreateSeqSBAIJ" 1691 /*@C 1692 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1693 compressed row) format. For good matrix assembly performance the 1694 user should preallocate the matrix storage by setting the parameter nz 1695 (or the array nnz). By setting these parameters accurately, performance 1696 during matrix assembly can be increased by more than a factor of 50. 1697 1698 Collective on MPI_Comm 1699 1700 Input Parameters: 1701 + comm - MPI communicator, set to PETSC_COMM_SELF 1702 . bs - size of block 1703 . m - number of rows, or number of columns 1704 . nz - number of block nonzeros per block row (same for all rows) 1705 - nnz - array containing the number of block nonzeros in the upper triangular plus 1706 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1707 1708 Output Parameter: 1709 . A - the symmetric matrix 1710 1711 Options Database Keys: 1712 . -mat_no_unroll - uses code that does not unroll the loops in the 1713 block calculations (much slower) 1714 . -mat_block_size - size of the blocks to use 1715 1716 Level: intermediate 1717 1718 Notes: 1719 1720 Specify the preallocated storage with either nz or nnz (not both). 1721 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1722 allocation. For additional details, see the users manual chapter on 1723 matrices. 1724 1725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1726 @*/ 1727 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1728 { 1729 int ierr; 1730 1731 PetscFunctionBegin; 1732 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1733 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1734 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1740 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1741 { 1742 Mat C; 1743 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1744 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1745 1746 PetscFunctionBegin; 1747 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1748 1749 *B = 0; 1750 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1751 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1752 c = (Mat_SeqSBAIJ*)C->data; 1753 1754 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1755 C->preallocated = PETSC_TRUE; 1756 C->factor = A->factor; 1757 c->row = 0; 1758 c->icol = 0; 1759 c->saved_values = 0; 1760 c->keepzeroedrows = a->keepzeroedrows; 1761 C->assembled = PETSC_TRUE; 1762 1763 c->bs = a->bs; 1764 c->bs2 = a->bs2; 1765 c->mbs = a->mbs; 1766 c->nbs = a->nbs; 1767 1768 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1769 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1770 for (i=0; i<mbs; i++) { 1771 c->imax[i] = a->imax[i]; 1772 c->ilen[i] = a->ilen[i]; 1773 } 1774 1775 /* allocate the matrix space */ 1776 c->singlemalloc = PETSC_TRUE; 1777 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1778 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1779 c->j = (int*)(c->a + nz*bs2); 1780 c->i = c->j + nz; 1781 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1782 if (mbs > 0) { 1783 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1784 if (cpvalues == MAT_COPY_VALUES) { 1785 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1786 } else { 1787 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1788 } 1789 } 1790 1791 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1792 c->sorted = a->sorted; 1793 c->roworiented = a->roworiented; 1794 c->nonew = a->nonew; 1795 1796 if (a->diag) { 1797 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1798 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1799 for (i=0; i<mbs; i++) { 1800 c->diag[i] = a->diag[i]; 1801 } 1802 } else c->diag = 0; 1803 c->s_nz = a->s_nz; 1804 c->s_maxnz = a->s_maxnz; 1805 c->solve_work = 0; 1806 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1807 c->mult_work = 0; 1808 *B = C; 1809 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1810 PetscFunctionReturn(0); 1811 } 1812 1813 EXTERN_C_BEGIN 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1816 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1817 { 1818 Mat_SeqSBAIJ *a; 1819 Mat B; 1820 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1821 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1822 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1823 int *masked,nmask,tmp,bs2,ishift; 1824 PetscScalar *aa; 1825 MPI_Comm comm = ((PetscObject)viewer)->comm; 1826 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS) 1827 PetscTruth flag; 1828 #endif 1829 1830 PetscFunctionBegin; 1831 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1832 bs2 = bs*bs; 1833 1834 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1835 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1836 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1837 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1838 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1839 M = header[1]; N = header[2]; nz = header[3]; 1840 1841 if (header[3] < 0) { 1842 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1843 } 1844 1845 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1846 1847 /* 1848 This code adds extra rows to make sure the number of rows is 1849 divisible by the blocksize 1850 */ 1851 mbs = M/bs; 1852 extra_rows = bs - M + bs*(mbs); 1853 if (extra_rows == bs) extra_rows = 0; 1854 else mbs++; 1855 if (extra_rows) { 1856 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1857 } 1858 1859 /* read in row lengths */ 1860 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1861 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1862 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1863 1864 /* read in column indices */ 1865 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1866 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1867 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1868 1869 /* loop over row lengths determining block row lengths */ 1870 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1871 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1872 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1873 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1874 masked = mask + mbs; 1875 rowcount = 0; nzcount = 0; 1876 for (i=0; i<mbs; i++) { 1877 nmask = 0; 1878 for (j=0; j<bs; j++) { 1879 kmax = rowlengths[rowcount]; 1880 for (k=0; k<kmax; k++) { 1881 tmp = jj[nzcount++]/bs; /* block col. index */ 1882 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1883 } 1884 rowcount++; 1885 } 1886 s_browlengths[i] += nmask; 1887 1888 /* zero out the mask elements we set */ 1889 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1890 } 1891 1892 /* create our matrix */ 1893 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1894 B = *A; 1895 a = (Mat_SeqSBAIJ*)B->data; 1896 1897 /* set matrix "i" values */ 1898 a->i[0] = 0; 1899 for (i=1; i<= mbs; i++) { 1900 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1901 a->ilen[i-1] = s_browlengths[i-1]; 1902 } 1903 a->s_nz = a->i[mbs]; 1904 1905 /* read in nonzero values */ 1906 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1907 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1908 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1909 1910 /* set "a" and "j" values into matrix */ 1911 nzcount = 0; jcount = 0; 1912 for (i=0; i<mbs; i++) { 1913 nzcountb = nzcount; 1914 nmask = 0; 1915 for (j=0; j<bs; j++) { 1916 kmax = rowlengths[i*bs+j]; 1917 for (k=0; k<kmax; k++) { 1918 tmp = jj[nzcount++]/bs; /* block col. index */ 1919 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1920 } 1921 } 1922 /* sort the masked values */ 1923 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1924 1925 /* set "j" values into matrix */ 1926 maskcount = 1; 1927 for (j=0; j<nmask; j++) { 1928 a->j[jcount++] = masked[j]; 1929 mask[masked[j]] = maskcount++; 1930 } 1931 1932 /* set "a" values into matrix */ 1933 ishift = bs2*a->i[i]; 1934 for (j=0; j<bs; j++) { 1935 kmax = rowlengths[i*bs+j]; 1936 for (k=0; k<kmax; k++) { 1937 tmp = jj[nzcountb]/bs ; /* block col. index */ 1938 if (tmp >= i){ 1939 block = mask[tmp] - 1; 1940 point = jj[nzcountb] - bs*tmp; 1941 idx = ishift + bs2*block + j + bs*point; 1942 a->a[idx] = aa[nzcountb]; 1943 } 1944 nzcountb++; 1945 } 1946 } 1947 /* zero out the mask elements we set */ 1948 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1949 } 1950 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1951 1952 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1953 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1954 ierr = PetscFree(aa);CHKERRQ(ierr); 1955 ierr = PetscFree(jj);CHKERRQ(ierr); 1956 ierr = PetscFree(mask);CHKERRQ(ierr); 1957 1958 B->assembled = PETSC_TRUE; 1959 #if defined(PETSC_HAVE_SPOOLES) 1960 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 1961 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); } 1962 #endif 1963 #if defined(PETSC_HAVE_MUMPS) 1964 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr); 1965 if (flag) { ierr = MatUseMUMPS_MPIAIJ(B);CHKERRQ(ierr); } 1966 #endif 1967 ierr = MatView_Private(B);CHKERRQ(ierr); 1968 PetscFunctionReturn(0); 1969 } 1970 EXTERN_C_END 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1974 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1975 { 1976 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1977 MatScalar *aa=a->a,*v,*v1; 1978 PetscScalar *x,*b,*t,sum,d; 1979 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1980 int nz,nz1,*vj,*vj1,i; 1981 1982 PetscFunctionBegin; 1983 its = its*lits; 1984 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1985 1986 if (bs > 1) 1987 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1988 1989 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1990 if (xx != bb) { 1991 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1992 } else { 1993 b = x; 1994 } 1995 1996 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1997 1998 if (flag & SOR_ZERO_INITIAL_GUESS) { 1999 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2000 for (i=0; i<m; i++) 2001 t[i] = b[i]; 2002 2003 for (i=0; i<m; i++){ 2004 d = *(aa + ai[i]); /* diag[i] */ 2005 v = aa + ai[i] + 1; 2006 vj = aj + ai[i] + 1; 2007 nz = ai[i+1] - ai[i] - 1; 2008 x[i] = omega*t[i]/d; 2009 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2010 PetscLogFlops(2*nz-1); 2011 } 2012 } 2013 2014 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2015 for (i=0; i<m; i++) 2016 t[i] = b[i]; 2017 2018 for (i=0; i<m-1; i++){ /* update rhs */ 2019 v = aa + ai[i] + 1; 2020 vj = aj + ai[i] + 1; 2021 nz = ai[i+1] - ai[i] - 1; 2022 while (nz--) t[*vj++] -= x[i]*(*v++); 2023 PetscLogFlops(2*nz-1); 2024 } 2025 for (i=m-1; i>=0; i--){ 2026 d = *(aa + ai[i]); 2027 v = aa + ai[i] + 1; 2028 vj = aj + ai[i] + 1; 2029 nz = ai[i+1] - ai[i] - 1; 2030 sum = t[i]; 2031 while (nz--) sum -= x[*vj++]*(*v++); 2032 PetscLogFlops(2*nz-1); 2033 x[i] = (1-omega)*x[i] + omega*sum/d; 2034 } 2035 } 2036 its--; 2037 } 2038 2039 while (its--) { 2040 /* 2041 forward sweep: 2042 for i=0,...,m-1: 2043 sum[i] = (b[i] - U(i,:)x )/d[i]; 2044 x[i] = (1-omega)x[i] + omega*sum[i]; 2045 b = b - x[i]*U^T(i,:); 2046 2047 */ 2048 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2049 for (i=0; i<m; i++) 2050 t[i] = b[i]; 2051 2052 for (i=0; i<m; i++){ 2053 d = *(aa + ai[i]); /* diag[i] */ 2054 v = aa + ai[i] + 1; v1=v; 2055 vj = aj + ai[i] + 1; vj1=vj; 2056 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2057 sum = t[i]; 2058 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2059 x[i] = (1-omega)*x[i] + omega*sum/d; 2060 while (nz--) t[*vj++] -= x[i]*(*v++); 2061 PetscLogFlops(4*nz-2); 2062 } 2063 } 2064 2065 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2066 /* 2067 backward sweep: 2068 b = b - x[i]*U^T(i,:), i=0,...,n-2 2069 for i=m-1,...,0: 2070 sum[i] = (b[i] - U(i,:)x )/d[i]; 2071 x[i] = (1-omega)x[i] + omega*sum[i]; 2072 */ 2073 for (i=0; i<m; i++) 2074 t[i] = b[i]; 2075 2076 for (i=0; i<m-1; i++){ /* update rhs */ 2077 v = aa + ai[i] + 1; 2078 vj = aj + ai[i] + 1; 2079 nz = ai[i+1] - ai[i] - 1; 2080 while (nz--) t[*vj++] -= x[i]*(*v++); 2081 PetscLogFlops(2*nz-1); 2082 } 2083 for (i=m-1; i>=0; i--){ 2084 d = *(aa + ai[i]); 2085 v = aa + ai[i] + 1; 2086 vj = aj + ai[i] + 1; 2087 nz = ai[i+1] - ai[i] - 1; 2088 sum = t[i]; 2089 while (nz--) sum -= x[*vj++]*(*v++); 2090 PetscLogFlops(2*nz-1); 2091 x[i] = (1-omega)*x[i] + omega*sum/d; 2092 } 2093 } 2094 } 2095 2096 ierr = PetscFree(t); CHKERRQ(ierr); 2097 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2098 if (bb != xx) { 2099 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2100 } 2101 2102 PetscFunctionReturn(0); 2103 } 2104 2105 2106 2107 2108 2109 2110