1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "../src/mat/impls/sbaij/seq/sbaij.h" 9 10 extern PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat,PetscTruth); 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 *missing = PETSC_FALSE; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 *missing = PETSC_TRUE; 31 if (dd) *dd = i; 32 break; 33 } 34 } 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 41 { 42 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 46 PetscFunctionBegin; 47 if (!a->diag) { 48 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 49 } 50 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs; 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 *nn = n; 64 if (!ia) PetscFunctionReturn(0); 65 if (!blockcompressed) { 66 /* malloc & create the natural set of indices */ 67 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 68 for (i=0; i<n+1; i++) { 69 for (j=0; j<bs; j++) { 70 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 71 } 72 } 73 for (i=0; i<nz; i++) { 74 for (j=0; j<bs; j++) { 75 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 76 } 77 } 78 } else { /* blockcompressed */ 79 if (oshift == 1) { 80 /* temporarily add 1 to i and j indices */ 81 for (i=0; i<nz; i++) a->j[i]++; 82 for (i=0; i<n+1; i++) a->i[i]++; 83 } 84 *ia = a->i; *ja = a->j; 85 } 86 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscInt i,n = a->mbs,nz = a->i[n]; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 if (!ia) PetscFunctionReturn(0); 100 101 if (!blockcompressed) { 102 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 103 } else if (oshift == 1) { /* blockcompressed */ 104 for (i=0; i<nz; i++) a->j[i]--; 105 for (i=0; i<n+1; i++) a->i[i]--; 106 } 107 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 114 { 115 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 #if defined(PETSC_USE_LOG) 120 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap->N,a->nz); 121 #endif 122 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 123 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 124 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 125 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 127 if (a->inode.size) {ierr = PetscFree(a->inode.size);CHKERRQ(ierr);} 128 ierr = PetscFree(a->diag);CHKERRQ(ierr); 129 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 130 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 131 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 132 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 133 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 134 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 135 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 136 ierr = PetscFree(a->jshort);CHKERRQ(ierr); 137 ierr = PetscFree(a->inew);CHKERRQ(ierr); 138 ierr = PetscFree(a);CHKERRQ(ierr); 139 140 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 141 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 142 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 143 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 144 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 145 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 146 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 152 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg) 153 { 154 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 155 PetscErrorCode ierr; 156 157 PetscFunctionBegin; 158 switch (op) { 159 case MAT_ROW_ORIENTED: 160 a->roworiented = flg; 161 break; 162 case MAT_KEEP_NONZERO_PATTERN: 163 a->keepnonzeropattern = flg; 164 break; 165 case MAT_NEW_NONZERO_LOCATIONS: 166 a->nonew = (flg ? 0 : 1); 167 break; 168 case MAT_NEW_NONZERO_LOCATION_ERR: 169 a->nonew = (flg ? -1 : 0); 170 break; 171 case MAT_NEW_NONZERO_ALLOCATION_ERR: 172 a->nonew = (flg ? -2 : 0); 173 break; 174 case MAT_UNUSED_NONZERO_LOCATION_ERR: 175 a->nounused = (flg ? -1 : 0); 176 break; 177 case MAT_NEW_DIAGONALS: 178 case MAT_IGNORE_OFF_PROC_ENTRIES: 179 case MAT_USE_HASH_TABLE: 180 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 181 break; 182 case MAT_HERMITIAN: 183 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 184 case MAT_SYMMETRIC: 185 case MAT_STRUCTURALLY_SYMMETRIC: 186 case MAT_SYMMETRY_ETERNAL: 187 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 188 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 189 break; 190 case MAT_IGNORE_LOWER_TRIANGULAR: 191 a->ignore_ltriangular = flg; 192 break; 193 case MAT_ERROR_LOWER_TRIANGULAR: 194 a->ignore_ltriangular = flg; 195 break; 196 case MAT_GETROW_UPPERTRIANGULAR: 197 a->getrow_utriangular = flg; 198 break; 199 default: 200 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 201 } 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 207 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 208 { 209 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 210 PetscErrorCode ierr; 211 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 212 MatScalar *aa,*aa_i; 213 PetscScalar *v_i; 214 215 PetscFunctionBegin; 216 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()"); 217 /* Get the upper triangular part of the row */ 218 bs = A->rmap->bs; 219 ai = a->i; 220 aj = a->j; 221 aa = a->a; 222 bs2 = a->bs2; 223 224 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 225 226 bn = row/bs; /* Block number */ 227 bp = row % bs; /* Block position */ 228 M = ai[bn+1] - ai[bn]; 229 *ncols = bs*M; 230 231 if (v) { 232 *v = 0; 233 if (*ncols) { 234 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 235 for (i=0; i<M; i++) { /* for each block in the block row */ 236 v_i = *v + i*bs; 237 aa_i = aa + bs2*(ai[bn] + i); 238 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 239 } 240 } 241 } 242 243 if (cols) { 244 *cols = 0; 245 if (*ncols) { 246 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 247 for (i=0; i<M; i++) { /* for each block in the block row */ 248 cols_i = *cols + i*bs; 249 itmp = bs*aj[ai[bn] + i]; 250 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 251 } 252 } 253 } 254 255 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 256 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 257 #ifdef column_search 258 v_i = *v + M*bs; 259 cols_i = *cols + M*bs; 260 for (i=0; i<bn; i++){ /* for each block row */ 261 M = ai[i+1] - ai[i]; 262 for (j=0; j<M; j++){ 263 itmp = aj[ai[i] + j]; /* block column value */ 264 if (itmp == bn){ 265 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 266 for (k=0; k<bs; k++) { 267 *cols_i++ = i*bs+k; 268 *v_i++ = aa_i[k]; 269 } 270 *ncols += bs; 271 break; 272 } 273 } 274 } 275 #endif 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 281 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 282 { 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 287 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 293 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 294 { 295 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 296 297 PetscFunctionBegin; 298 a->getrow_utriangular = PETSC_TRUE; 299 PetscFunctionReturn(0); 300 } 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 303 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 304 { 305 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 306 307 PetscFunctionBegin; 308 a->getrow_utriangular = PETSC_FALSE; 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 314 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,MatReuse reuse,Mat *B) 315 { 316 PetscErrorCode ierr; 317 PetscFunctionBegin; 318 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 319 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 326 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 327 { 328 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 329 PetscErrorCode ierr; 330 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 331 const char *name; 332 PetscViewerFormat format; 333 334 PetscFunctionBegin; 335 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 336 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 337 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 338 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 339 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 340 Mat aij; 341 342 if (A->factor && bs>1){ 343 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 347 ierr = MatView(aij,viewer);CHKERRQ(ierr); 348 ierr = MatDestroy(aij);CHKERRQ(ierr); 349 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 350 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 351 for (i=0; i<a->mbs; i++) { 352 for (j=0; j<bs; j++) { 353 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 354 for (k=a->i[i]; k<a->i[i+1]; k++) { 355 for (l=0; l<bs; l++) { 356 #if defined(PETSC_USE_COMPLEX) 357 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 358 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 359 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 360 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 361 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 362 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 363 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 364 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 365 } 366 #else 367 if (a->a[bs2*k + l*bs + j] != 0.0) { 368 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 369 } 370 #endif 371 } 372 } 373 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 374 } 375 } 376 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 377 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 378 PetscFunctionReturn(0); 379 } else { 380 if (A->factor && bs>1){ 381 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 382 } 383 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 384 for (i=0; i<a->mbs; i++) { 385 for (j=0; j<bs; j++) { 386 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 387 for (k=a->i[i]; k<a->i[i+1]; k++) { 388 for (l=0; l<bs; l++) { 389 #if defined(PETSC_USE_COMPLEX) 390 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 391 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 392 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 393 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 394 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 395 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 396 } else { 397 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 398 } 399 #else 400 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 401 #endif 402 } 403 } 404 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 405 } 406 } 407 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 408 } 409 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 415 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 416 { 417 Mat A = (Mat) Aa; 418 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 419 PetscErrorCode ierr; 420 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 421 PetscMPIInt rank; 422 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 423 MatScalar *aa; 424 MPI_Comm comm; 425 PetscViewer viewer; 426 427 PetscFunctionBegin; 428 /* 429 This is nasty. If this is called from an originally parallel matrix 430 then all processes call this,but only the first has the matrix so the 431 rest should return immediately. 432 */ 433 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 434 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 435 if (rank) PetscFunctionReturn(0); 436 437 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 438 439 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 440 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 441 442 /* loop over matrix elements drawing boxes */ 443 color = PETSC_DRAW_BLUE; 444 for (i=0,row=0; i<mbs; i++,row+=bs) { 445 for (j=a->i[i]; j<a->i[i+1]; j++) { 446 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 447 x_l = a->j[j]*bs; x_r = x_l + 1.0; 448 aa = a->a + j*bs2; 449 for (k=0; k<bs; k++) { 450 for (l=0; l<bs; l++) { 451 if (PetscRealPart(*aa++) >= 0.) continue; 452 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 453 } 454 } 455 } 456 } 457 color = PETSC_DRAW_CYAN; 458 for (i=0,row=0; i<mbs; i++,row+=bs) { 459 for (j=a->i[i]; j<a->i[i+1]; j++) { 460 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 461 x_l = a->j[j]*bs; x_r = x_l + 1.0; 462 aa = a->a + j*bs2; 463 for (k=0; k<bs; k++) { 464 for (l=0; l<bs; l++) { 465 if (PetscRealPart(*aa++) != 0.) continue; 466 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 467 } 468 } 469 } 470 } 471 472 color = PETSC_DRAW_RED; 473 for (i=0,row=0; i<mbs; i++,row+=bs) { 474 for (j=a->i[i]; j<a->i[i+1]; j++) { 475 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 476 x_l = a->j[j]*bs; x_r = x_l + 1.0; 477 aa = a->a + j*bs2; 478 for (k=0; k<bs; k++) { 479 for (l=0; l<bs; l++) { 480 if (PetscRealPart(*aa++) <= 0.) continue; 481 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 482 } 483 } 484 } 485 } 486 PetscFunctionReturn(0); 487 } 488 489 #undef __FUNCT__ 490 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 491 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 492 { 493 PetscErrorCode ierr; 494 PetscReal xl,yl,xr,yr,w,h; 495 PetscDraw draw; 496 PetscTruth isnull; 497 498 PetscFunctionBegin; 499 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 500 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 501 502 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 503 xr = A->rmap->N; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 504 xr += w; yr += h; xl = -w; yl = -h; 505 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 506 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 507 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatView_SeqSBAIJ" 513 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 514 { 515 PetscErrorCode ierr; 516 PetscTruth iascii,isdraw; 517 518 PetscFunctionBegin; 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 520 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 521 if (iascii){ 522 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 523 } else if (isdraw) { 524 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 525 } else { 526 Mat B; 527 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 528 ierr = MatView(B,viewer);CHKERRQ(ierr); 529 ierr = MatDestroy(B);CHKERRQ(ierr); 530 } 531 PetscFunctionReturn(0); 532 } 533 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 537 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 538 { 539 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 540 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 541 PetscInt *ai = a->i,*ailen = a->ilen; 542 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 543 MatScalar *ap,*aa = a->a; 544 545 PetscFunctionBegin; 546 for (k=0; k<m; k++) { /* loop over rows */ 547 row = im[k]; brow = row/bs; 548 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 549 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 550 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 551 nrow = ailen[brow]; 552 for (l=0; l<n; l++) { /* loop over columns */ 553 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 554 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 555 col = in[l] ; 556 bcol = col/bs; 557 cidx = col%bs; 558 ridx = row%bs; 559 high = nrow; 560 low = 0; /* assume unsorted */ 561 while (high-low > 5) { 562 t = (low+high)/2; 563 if (rp[t] > bcol) high = t; 564 else low = t; 565 } 566 for (i=low; i<high; i++) { 567 if (rp[i] > bcol) break; 568 if (rp[i] == bcol) { 569 *v++ = ap[bs2*i+bs*cidx+ridx]; 570 goto finished; 571 } 572 } 573 *v++ = 0.0; 574 finished:; 575 } 576 } 577 PetscFunctionReturn(0); 578 } 579 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 583 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 584 { 585 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 586 PetscErrorCode ierr; 587 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 588 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 589 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 590 PetscTruth roworiented=a->roworiented; 591 const PetscScalar *value = v; 592 MatScalar *ap,*aa = a->a,*bap; 593 594 PetscFunctionBegin; 595 if (roworiented) { 596 stepval = (n-1)*bs; 597 } else { 598 stepval = (m-1)*bs; 599 } 600 for (k=0; k<m; k++) { /* loop over added rows */ 601 row = im[k]; 602 if (row < 0) continue; 603 #if defined(PETSC_USE_DEBUG) 604 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 605 #endif 606 rp = aj + ai[row]; 607 ap = aa + bs2*ai[row]; 608 rmax = imax[row]; 609 nrow = ailen[row]; 610 low = 0; 611 high = nrow; 612 for (l=0; l<n; l++) { /* loop over added columns */ 613 if (in[l] < 0) continue; 614 col = in[l]; 615 #if defined(PETSC_USE_DEBUG) 616 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 617 #endif 618 if (col < row) continue; /* ignore lower triangular block */ 619 if (roworiented) { 620 value = v + k*(stepval+bs)*bs + l*bs; 621 } else { 622 value = v + l*(stepval+bs)*bs + k*bs; 623 } 624 if (col <= lastcol) low = 0; else high = nrow; 625 lastcol = col; 626 while (high-low > 7) { 627 t = (low+high)/2; 628 if (rp[t] > col) high = t; 629 else low = t; 630 } 631 for (i=low; i<high; i++) { 632 if (rp[i] > col) break; 633 if (rp[i] == col) { 634 bap = ap + bs2*i; 635 if (roworiented) { 636 if (is == ADD_VALUES) { 637 for (ii=0; ii<bs; ii++,value+=stepval) { 638 for (jj=ii; jj<bs2; jj+=bs) { 639 bap[jj] += *value++; 640 } 641 } 642 } else { 643 for (ii=0; ii<bs; ii++,value+=stepval) { 644 for (jj=ii; jj<bs2; jj+=bs) { 645 bap[jj] = *value++; 646 } 647 } 648 } 649 } else { 650 if (is == ADD_VALUES) { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=0; jj<bs; jj++) { 653 *bap++ += *value++; 654 } 655 } 656 } else { 657 for (ii=0; ii<bs; ii++,value+=stepval) { 658 for (jj=0; jj<bs; jj++) { 659 *bap++ = *value++; 660 } 661 } 662 } 663 } 664 goto noinsert2; 665 } 666 } 667 if (nonew == 1) goto noinsert2; 668 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 669 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 670 N = nrow++ - 1; high++; 671 /* shift up all the later entries in this row */ 672 for (ii=N; ii>=i; ii--) { 673 rp[ii+1] = rp[ii]; 674 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 675 } 676 if (N >= i) { 677 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 678 } 679 rp[i] = col; 680 bap = ap + bs2*i; 681 if (roworiented) { 682 for (ii=0; ii<bs; ii++,value+=stepval) { 683 for (jj=ii; jj<bs2; jj+=bs) { 684 bap[jj] = *value++; 685 } 686 } 687 } else { 688 for (ii=0; ii<bs; ii++,value+=stepval) { 689 for (jj=0; jj<bs; jj++) { 690 *bap++ = *value++; 691 } 692 } 693 } 694 noinsert2:; 695 low = i; 696 } 697 ailen[row] = nrow; 698 } 699 PetscFunctionReturn(0); 700 } 701 702 /* 703 This is not yet used 704 */ 705 #undef __FUNCT__ 706 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ_Inode" 707 PetscErrorCode MatAssemblyEnd_SeqSBAIJ_Inode(Mat A) 708 { 709 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 710 PetscErrorCode ierr; 711 const PetscInt *ai = a->i, *aj = a->j,*cols; 712 PetscInt i = 0,j,blk_size,m = A->rmap->n,node_count = 0,nzx,nzy,*ns,row,nz,cnt,cnt2,*counts; 713 PetscTruth flag; 714 715 PetscFunctionBegin; 716 ierr = PetscMalloc(m*sizeof(PetscInt),&ns);CHKERRQ(ierr); 717 while (i < m){ 718 nzx = ai[i+1] - ai[i]; /* Number of nonzeros */ 719 /* Limits the number of elements in a node to 'a->inode.limit' */ 720 for (j=i+1,blk_size=1; j<m && blk_size <a->inode.limit; ++j,++blk_size) { 721 nzy = ai[j+1] - ai[j]; 722 if (nzy != (nzx - j + i)) break; 723 ierr = PetscMemcmp(aj + ai[i] + j - i,aj + ai[j],nzy*sizeof(PetscInt),&flag);CHKERRQ(ierr); 724 if (!flag) break; 725 } 726 ns[node_count++] = blk_size; 727 i = j; 728 } 729 if (!a->inode.size && m && node_count > .9*m) { 730 ierr = PetscFree(ns);CHKERRQ(ierr); 731 ierr = PetscInfo2(A,"Found %D nodes out of %D rows. Not using Inode routines\n",node_count,m);CHKERRQ(ierr); 732 } else { 733 a->inode.node_count = node_count; 734 ierr = PetscMalloc(node_count*sizeof(PetscInt),&a->inode.size);CHKERRQ(ierr); 735 ierr = PetscMemcpy(a->inode.size,ns,node_count*sizeof(PetscInt)); 736 ierr = PetscFree(ns);CHKERRQ(ierr); 737 ierr = PetscInfo3(A,"Found %D nodes of %D. Limit used: %D. Using Inode routines\n",node_count,m,a->inode.limit);CHKERRQ(ierr); 738 739 /* count collections of adjacent columns in each inode */ 740 row = 0; 741 cnt = 0; 742 for (i=0; i<node_count; i++) { 743 cols = aj + ai[row] + a->inode.size[i]; 744 nz = ai[row+1] - ai[row] - a->inode.size[i]; 745 for (j=1; j<nz; j++) { 746 if (cols[j] != cols[j-1]+1) { 747 cnt++; 748 } 749 } 750 cnt++; 751 row += a->inode.size[i]; 752 } 753 ierr = PetscMalloc(2*cnt*sizeof(PetscInt),&counts);CHKERRQ(ierr); 754 cnt = 0; 755 row = 0; 756 for (i=0; i<node_count; i++) { 757 cols = aj + ai[row] + a->inode.size[i]; 758 CHKMEMQ; 759 counts[2*cnt] = cols[0]; 760 CHKMEMQ; 761 nz = ai[row+1] - ai[row] - a->inode.size[i]; 762 cnt2 = 1; 763 for (j=1; j<nz; j++) { 764 if (cols[j] != cols[j-1]+1) { 765 CHKMEMQ; 766 counts[2*(cnt++)+1] = cnt2; 767 counts[2*cnt] = cols[j]; 768 CHKMEMQ; 769 cnt2 = 1; 770 } else cnt2++; 771 } 772 CHKMEMQ; 773 counts[2*(cnt++)+1] = cnt2; 774 CHKMEMQ; 775 row += a->inode.size[i]; 776 } 777 ierr = PetscIntView(2*cnt,counts,0); 778 } 779 780 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatRelax_SeqSBAIJ_1_ushort" 786 PetscErrorCode MatRelax_SeqSBAIJ_1_ushort(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 787 { 788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 789 const MatScalar *aa=a->a,*v,*v1,*aidiag; 790 PetscScalar *x,*b,*t,sum; 791 MatScalar tmp; 792 PetscErrorCode ierr; 793 PetscInt m=a->mbs,bs=A->rmap->bs,j; 794 const PetscInt *ai=a->i; 795 const unsigned short *aj=a->jshort,*vj,*vj1; 796 PetscInt nz,nz1,i; 797 798 PetscFunctionBegin; 799 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 800 801 its = its*lits; 802 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 803 804 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 805 806 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 807 if (xx != bb) { 808 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 809 } else { 810 b = x; 811 } 812 813 if (!a->idiagvalid) { 814 if (!a->idiag) { 815 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 816 } 817 for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]]; 818 a->idiagvalid = PETSC_TRUE; 819 } 820 821 if (!a->relax_work) { 822 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 823 } 824 t = a->relax_work; 825 826 aidiag = a->idiag; 827 if (flag & SOR_ZERO_INITIAL_GUESS) { 828 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 829 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 830 831 v = aa + 1; 832 vj = aj + 1; 833 for (i=0; i<m; i++){ 834 nz = ai[i+1] - ai[i] - 1; 835 tmp = - (x[i] = omega*t[i]*aidiag[i]); 836 for (j=0; j<nz; j++) { 837 t[vj[j]] += tmp*v[j]; 838 } 839 v += nz + 1; 840 vj += nz + 1; 841 } 842 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 843 } 844 845 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 846 int nz2; 847 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 848 t = b; 849 } 850 851 v = aa + ai[m-1] + 1; 852 vj = aj + ai[m-1] + 1; 853 nz = 0; 854 for (i=m-1; i>=0; i--){ 855 sum = 0.0; 856 nz2 = ai[i] - ai[i-1] - 1; 857 PETSC_Prefetch(v-nz2-1,0,1); 858 PETSC_Prefetch(vj-nz2-1,0,1); 859 PetscSparseDensePlusDot(sum,x,v,vj,nz); 860 sum = t[i] - sum; 861 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 862 nz = nz2; 863 v -= nz + 1; 864 vj -= nz + 1; 865 } 866 t = a->relax_work; 867 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 868 } 869 its--; 870 } 871 872 while (its--) { 873 /* 874 forward sweep: 875 for i=0,...,m-1: 876 sum[i] = (b[i] - U(i,:)x )/d[i]; 877 x[i] = (1-omega)x[i] + omega*sum[i]; 878 b = b - x[i]*U^T(i,:); 879 880 */ 881 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 882 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 883 884 for (i=0; i<m; i++){ 885 v = aa + ai[i] + 1; v1=v; 886 vj = aj + ai[i] + 1; vj1=vj; 887 nz = ai[i+1] - ai[i] - 1; nz1=nz; 888 sum = t[i]; 889 ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr); 890 while (nz1--) sum -= (*v1++)*x[*vj1++]; 891 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 892 while (nz--) t[*vj++] -= x[i]*(*v++); 893 } 894 } 895 896 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 897 /* 898 backward sweep: 899 b = b - x[i]*U^T(i,:), i=0,...,n-2 900 for i=m-1,...,0: 901 sum[i] = (b[i] - U(i,:)x )/d[i]; 902 x[i] = (1-omega)x[i] + omega*sum[i]; 903 */ 904 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 905 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 906 907 for (i=0; i<m-1; i++){ /* update rhs */ 908 v = aa + ai[i] + 1; 909 vj = aj + ai[i] + 1; 910 nz = ai[i+1] - ai[i] - 1; 911 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 912 while (nz--) t[*vj++] -= x[i]*(*v++); 913 } 914 for (i=m-1; i>=0; i--){ 915 v = aa + ai[i] + 1; 916 vj = aj + ai[i] + 1; 917 nz = ai[i+1] - ai[i] - 1; 918 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 919 sum = t[i]; 920 while (nz--) sum -= x[*vj++]*(*v++); 921 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 922 } 923 } 924 } 925 926 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 927 if (bb != xx) { 928 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatMult_SeqSBAIJ_1_ushort" 935 PetscErrorCode MatMult_SeqSBAIJ_1_ushort(Mat A,Vec xx,Vec zz) 936 { 937 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 938 const PetscScalar *x; 939 PetscScalar *z,x1,sum; 940 const MatScalar *v; 941 PetscErrorCode ierr; 942 PetscInt mbs=a->mbs,i,j,nz; 943 const unsigned short *ib=a->jshort; 944 const PetscInt *ai=a->i; 945 946 PetscFunctionBegin; 947 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 948 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 949 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 950 951 v = a->a; 952 for (i=0; i<mbs; i++) { 953 nz = ai[i+1] - ai[i]; /* length of i_th row of A */ 954 x1 = x[i]; 955 sum = v[0]*x1; /* diagonal term */ 956 for (j=1; j<nz; j++) { 957 z[ib[j]] += v[j] * x1; /* (strict lower triangular part of A)*x */ 958 sum += v[j] * x[ib[j]]; /* (strict upper triangular part of A)*x */ 959 } 960 z[i] += sum; 961 v += nz; 962 ib += nz; 963 } 964 965 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 966 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 967 ierr = PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 973 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 974 { 975 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 976 PetscErrorCode ierr; 977 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 978 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 979 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 980 MatScalar *aa = a->a,*ap; 981 982 PetscFunctionBegin; 983 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 984 985 if (m) rmax = ailen[0]; 986 for (i=1; i<mbs; i++) { 987 /* move each row back by the amount of empty slots (fshift) before it*/ 988 fshift += imax[i-1] - ailen[i-1]; 989 rmax = PetscMax(rmax,ailen[i]); 990 if (fshift) { 991 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 992 N = ailen[i]; 993 for (j=0; j<N; j++) { 994 ip[j-fshift] = ip[j]; 995 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 996 } 997 } 998 ai[i] = ai[i-1] + ailen[i-1]; 999 } 1000 if (mbs) { 1001 fshift += imax[mbs-1] - ailen[mbs-1]; 1002 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1003 } 1004 /* reset ilen and imax for each row */ 1005 for (i=0; i<mbs; i++) { 1006 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1007 } 1008 a->nz = ai[mbs]; 1009 1010 /* diagonals may have moved, reset it */ 1011 if (a->diag) { 1012 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1013 } 1014 if (fshift && a->nounused == -1) { 1015 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1016 } 1017 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap->N,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1018 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1019 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1020 a->reallocs = 0; 1021 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1022 a->idiagvalid = PETSC_FALSE; 1023 1024 if (A->cmap->n < 65536 && A->cmap->bs == 1) { 1025 if (!a->jshort) { 1026 ierr = PetscMalloc(a->i[A->rmap->n]*sizeof(unsigned short),&a->jshort);CHKERRQ(ierr); 1027 for (i=0; i<a->i[A->rmap->n]; i++) a->jshort[i] = a->j[i]; 1028 A->ops->mult = MatMult_SeqSBAIJ_1_ushort; 1029 A->ops->relax = MatRelax_SeqSBAIJ_1_ushort; 1030 } 1031 } 1032 PetscFunctionReturn(0); 1033 } 1034 1035 /* 1036 This function returns an array of flags which indicate the locations of contiguous 1037 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1038 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1039 Assume: sizes should be long enough to hold all the values. 1040 */ 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 1043 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1044 { 1045 PetscInt i,j,k,row; 1046 PetscTruth flg; 1047 1048 PetscFunctionBegin; 1049 for (i=0,j=0; i<n; j++) { 1050 row = idx[i]; 1051 if (row%bs!=0) { /* Not the begining of a block */ 1052 sizes[j] = 1; 1053 i++; 1054 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 1055 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1056 i++; 1057 } else { /* Begining of the block, so check if the complete block exists */ 1058 flg = PETSC_TRUE; 1059 for (k=1; k<bs; k++) { 1060 if (row+k != idx[i+k]) { /* break in the block */ 1061 flg = PETSC_FALSE; 1062 break; 1063 } 1064 } 1065 if (flg) { /* No break in the bs */ 1066 sizes[j] = bs; 1067 i+= bs; 1068 } else { 1069 sizes[j] = 1; 1070 i++; 1071 } 1072 } 1073 } 1074 *bs_max = j; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 1079 /* Only add/insert a(i,j) with i<=j (blocks). 1080 Any a(i,j) with i>j input by user is ingored. 1081 */ 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 1085 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1086 { 1087 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1088 PetscErrorCode ierr; 1089 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1090 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 1091 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 1092 PetscInt ridx,cidx,bs2=a->bs2; 1093 MatScalar *ap,value,*aa=a->a,*bap; 1094 1095 PetscFunctionBegin; 1096 for (k=0; k<m; k++) { /* loop over added rows */ 1097 row = im[k]; /* row number */ 1098 brow = row/bs; /* block row number */ 1099 if (row < 0) continue; 1100 #if defined(PETSC_USE_DEBUG) 1101 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 1102 #endif 1103 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 1104 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 1105 rmax = imax[brow]; /* maximum space allocated for this row */ 1106 nrow = ailen[brow]; /* actual length of this row */ 1107 low = 0; 1108 1109 for (l=0; l<n; l++) { /* loop over added columns */ 1110 if (in[l] < 0) continue; 1111 #if defined(PETSC_USE_DEBUG) 1112 if (in[l] >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap->N-1); 1113 #endif 1114 col = in[l]; 1115 bcol = col/bs; /* block col number */ 1116 1117 if (brow > bcol) { 1118 if (a->ignore_ltriangular){ 1119 continue; /* ignore lower triangular values */ 1120 } else { 1121 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 1122 } 1123 } 1124 1125 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 1126 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 1127 /* element value a(k,l) */ 1128 if (roworiented) { 1129 value = v[l + k*n]; 1130 } else { 1131 value = v[k + l*m]; 1132 } 1133 1134 /* move pointer bap to a(k,l) quickly and add/insert value */ 1135 if (col <= lastcol) low = 0; high = nrow; 1136 lastcol = col; 1137 while (high-low > 7) { 1138 t = (low+high)/2; 1139 if (rp[t] > bcol) high = t; 1140 else low = t; 1141 } 1142 for (i=low; i<high; i++) { 1143 if (rp[i] > bcol) break; 1144 if (rp[i] == bcol) { 1145 bap = ap + bs2*i + bs*cidx + ridx; 1146 if (is == ADD_VALUES) *bap += value; 1147 else *bap = value; 1148 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 1149 if (brow == bcol && ridx < cidx){ 1150 bap = ap + bs2*i + bs*ridx + cidx; 1151 if (is == ADD_VALUES) *bap += value; 1152 else *bap = value; 1153 } 1154 goto noinsert1; 1155 } 1156 } 1157 1158 if (nonew == 1) goto noinsert1; 1159 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1160 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1161 1162 N = nrow++ - 1; high++; 1163 /* shift up all the later entries in this row */ 1164 for (ii=N; ii>=i; ii--) { 1165 rp[ii+1] = rp[ii]; 1166 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1167 } 1168 if (N>=i) { 1169 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1170 } 1171 rp[i] = bcol; 1172 ap[bs2*i + bs*cidx + ridx] = value; 1173 noinsert1:; 1174 low = i; 1175 } 1176 } /* end of loop over added columns */ 1177 ailen[brow] = nrow; 1178 } /* end of loop over added rows */ 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1184 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,const MatFactorInfo *info) 1185 { 1186 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1187 Mat outA; 1188 PetscErrorCode ierr; 1189 PetscTruth row_identity; 1190 1191 PetscFunctionBegin; 1192 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 1193 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1194 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 1195 if (inA->rmap->bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap->bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 1196 1197 outA = inA; 1198 inA->factor = MAT_FACTOR_ICC; 1199 1200 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1201 ierr = MatSeqSBAIJSetNumericFactorization(inA,row_identity);CHKERRQ(ierr); 1202 1203 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1204 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1205 a->row = row; 1206 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1207 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1208 a->col = row; 1209 1210 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 1211 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 1212 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1213 1214 if (!a->solve_work) { 1215 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1216 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1217 } 1218 1219 ierr = MatCholeskyFactorNumeric(outA,inA,info);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 EXTERN_C_BEGIN 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1226 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1227 { 1228 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1229 PetscInt i,nz,n; 1230 1231 PetscFunctionBegin; 1232 nz = baij->maxnz; 1233 n = mat->cmap->n; 1234 for (i=0; i<nz; i++) { 1235 baij->j[i] = indices[i]; 1236 } 1237 baij->nz = nz; 1238 for (i=0; i<n; i++) { 1239 baij->ilen[i] = baij->imax[i]; 1240 } 1241 PetscFunctionReturn(0); 1242 } 1243 EXTERN_C_END 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1247 /*@ 1248 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1249 in the matrix. 1250 1251 Input Parameters: 1252 + mat - the SeqSBAIJ matrix 1253 - indices - the column indices 1254 1255 Level: advanced 1256 1257 Notes: 1258 This can be called if you have precomputed the nonzero structure of the 1259 matrix and want to provide it to the matrix object to improve the performance 1260 of the MatSetValues() operation. 1261 1262 You MUST have set the correct numbers of nonzeros per row in the call to 1263 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1264 1265 MUST be called before any calls to MatSetValues() 1266 1267 .seealso: MatCreateSeqSBAIJ 1268 @*/ 1269 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1270 { 1271 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1272 1273 PetscFunctionBegin; 1274 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1275 PetscValidPointer(indices,2); 1276 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1277 if (f) { 1278 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1279 } else { 1280 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1281 } 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1287 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1288 { 1289 PetscErrorCode ierr; 1290 1291 PetscFunctionBegin; 1292 /* If the two matrices have the same copy implementation, use fast copy. */ 1293 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1294 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1295 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1296 1297 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 1298 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1299 } 1300 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 1301 } else { 1302 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1303 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1304 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1311 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1312 { 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1322 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1323 { 1324 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1325 PetscFunctionBegin; 1326 *array = a->a; 1327 PetscFunctionReturn(0); 1328 } 1329 1330 #undef __FUNCT__ 1331 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1332 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1333 { 1334 PetscFunctionBegin; 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #include "petscblaslapack.h" 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1341 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1342 { 1343 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1344 PetscErrorCode ierr; 1345 PetscInt i,bs=Y->rmap->bs,bs2,j; 1346 PetscBLASInt one = 1,bnz = PetscBLASIntCast(x->nz); 1347 1348 PetscFunctionBegin; 1349 if (str == SAME_NONZERO_PATTERN) { 1350 PetscScalar alpha = a; 1351 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1352 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1353 if (y->xtoy && y->XtoY != X) { 1354 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1355 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1356 } 1357 if (!y->xtoy) { /* get xtoy */ 1358 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1359 y->XtoY = X; 1360 } 1361 bs2 = bs*bs; 1362 for (i=0; i<x->nz; i++) { 1363 j = 0; 1364 while (j < bs2){ 1365 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1366 j++; 1367 } 1368 } 1369 ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1370 } else { 1371 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1372 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1373 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1374 } 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1380 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1381 { 1382 PetscFunctionBegin; 1383 *flg = PETSC_TRUE; 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1389 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1390 { 1391 PetscFunctionBegin; 1392 *flg = PETSC_TRUE; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1398 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1399 { 1400 PetscFunctionBegin; 1401 *flg = PETSC_FALSE; 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1407 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1408 { 1409 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1410 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1411 MatScalar *aa = a->a; 1412 1413 PetscFunctionBegin; 1414 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1420 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1421 { 1422 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1423 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1424 MatScalar *aa = a->a; 1425 1426 PetscFunctionBegin; 1427 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 /* -------------------------------------------------------------------*/ 1432 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1433 MatGetRow_SeqSBAIJ, 1434 MatRestoreRow_SeqSBAIJ, 1435 MatMult_SeqSBAIJ_N, 1436 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1437 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1438 MatMultAdd_SeqSBAIJ_N, 1439 0, 1440 0, 1441 0, 1442 /*10*/ 0, 1443 0, 1444 MatCholeskyFactor_SeqSBAIJ, 1445 MatRelax_SeqSBAIJ, 1446 MatTranspose_SeqSBAIJ, 1447 /*15*/ MatGetInfo_SeqSBAIJ, 1448 MatEqual_SeqSBAIJ, 1449 MatGetDiagonal_SeqSBAIJ, 1450 MatDiagonalScale_SeqSBAIJ, 1451 MatNorm_SeqSBAIJ, 1452 /*20*/ 0, 1453 MatAssemblyEnd_SeqSBAIJ, 1454 MatSetOption_SeqSBAIJ, 1455 MatZeroEntries_SeqSBAIJ, 1456 /*24*/ 0, 1457 0, 1458 0, 1459 0, 1460 0, 1461 /*29*/ MatSetUpPreallocation_SeqSBAIJ, 1462 0, 1463 0, 1464 MatGetArray_SeqSBAIJ, 1465 MatRestoreArray_SeqSBAIJ, 1466 /*34*/ MatDuplicate_SeqSBAIJ, 1467 0, 1468 0, 1469 0, 1470 MatICCFactor_SeqSBAIJ, 1471 /*39*/ MatAXPY_SeqSBAIJ, 1472 MatGetSubMatrices_SeqSBAIJ, 1473 MatIncreaseOverlap_SeqSBAIJ, 1474 MatGetValues_SeqSBAIJ, 1475 MatCopy_SeqSBAIJ, 1476 /*44*/ 0, 1477 MatScale_SeqSBAIJ, 1478 0, 1479 0, 1480 0, 1481 /*49*/ 0, 1482 MatGetRowIJ_SeqSBAIJ, 1483 MatRestoreRowIJ_SeqSBAIJ, 1484 0, 1485 0, 1486 /*54*/ 0, 1487 0, 1488 0, 1489 0, 1490 MatSetValuesBlocked_SeqSBAIJ, 1491 /*59*/ MatGetSubMatrix_SeqSBAIJ, 1492 0, 1493 0, 1494 0, 1495 0, 1496 /*64*/ 0, 1497 0, 1498 0, 1499 0, 1500 0, 1501 /*69*/ MatGetRowMaxAbs_SeqSBAIJ, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*74*/ 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 /*79*/ 0, 1512 0, 1513 0, 1514 #if !defined(PETSC_USE_COMPLEX) 1515 MatGetInertia_SeqSBAIJ, 1516 #else 1517 0, 1518 #endif 1519 MatLoad_SeqSBAIJ, 1520 /*84*/ MatIsSymmetric_SeqSBAIJ, 1521 MatIsHermitian_SeqSBAIJ, 1522 MatIsStructurallySymmetric_SeqSBAIJ, 1523 0, 1524 0, 1525 /*89*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*94*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*99*/ 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 /*104*/0, 1541 MatRealPart_SeqSBAIJ, 1542 MatImaginaryPart_SeqSBAIJ, 1543 MatGetRowUpperTriangular_SeqSBAIJ, 1544 MatRestoreRowUpperTriangular_SeqSBAIJ, 1545 /*109*/0, 1546 0, 1547 0, 1548 0, 1549 MatMissingDiagonal_SeqSBAIJ 1550 }; 1551 1552 EXTERN_C_BEGIN 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1555 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1556 { 1557 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1558 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 if (aij->nonew != 1) { 1563 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1564 } 1565 1566 /* allocate space for values if not already there */ 1567 if (!aij->saved_values) { 1568 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1569 } 1570 1571 /* copy values over */ 1572 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 EXTERN_C_END 1576 1577 EXTERN_C_BEGIN 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1580 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1581 { 1582 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1583 PetscErrorCode ierr; 1584 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 1585 1586 PetscFunctionBegin; 1587 if (aij->nonew != 1) { 1588 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1589 } 1590 if (!aij->saved_values) { 1591 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1592 } 1593 1594 /* copy values over */ 1595 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 EXTERN_C_END 1599 1600 EXTERN_C_BEGIN 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1603 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1604 { 1605 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1606 PetscErrorCode ierr; 1607 PetscInt i,mbs,bs2, newbs = PetscAbs(bs); 1608 PetscTruth skipallocation = PETSC_FALSE,flg = PETSC_FALSE; 1609 1610 PetscFunctionBegin; 1611 B->preallocated = PETSC_TRUE; 1612 if (bs < 0) { 1613 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1614 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqSBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 1615 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1616 bs = PetscAbs(bs); 1617 } 1618 if (nnz && newbs != bs) { 1619 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 1620 } 1621 bs = newbs; 1622 1623 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1624 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1625 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 1626 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 1627 1628 mbs = B->rmap->N/bs; 1629 bs2 = bs*bs; 1630 1631 if (mbs*bs != B->rmap->N) { 1632 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1633 } 1634 1635 if (nz == MAT_SKIP_ALLOCATION) { 1636 skipallocation = PETSC_TRUE; 1637 nz = 0; 1638 } 1639 1640 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1641 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1642 if (nnz) { 1643 for (i=0; i<mbs; i++) { 1644 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1645 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1646 } 1647 } 1648 1649 B->ops->mult = MatMult_SeqSBAIJ_N; 1650 B->ops->multadd = MatMultAdd_SeqSBAIJ_N; 1651 B->ops->multtranspose = MatMult_SeqSBAIJ_N; 1652 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_N; 1653 ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr); 1654 if (!flg) { 1655 switch (bs) { 1656 case 1: 1657 B->ops->mult = MatMult_SeqSBAIJ_1; 1658 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1659 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1660 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1661 break; 1662 case 2: 1663 B->ops->mult = MatMult_SeqSBAIJ_2; 1664 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1665 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1666 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1667 break; 1668 case 3: 1669 B->ops->mult = MatMult_SeqSBAIJ_3; 1670 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1671 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1672 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1673 break; 1674 case 4: 1675 B->ops->mult = MatMult_SeqSBAIJ_4; 1676 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1677 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1678 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1679 break; 1680 case 5: 1681 B->ops->mult = MatMult_SeqSBAIJ_5; 1682 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1683 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1684 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1685 break; 1686 case 6: 1687 B->ops->mult = MatMult_SeqSBAIJ_6; 1688 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1689 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1690 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1691 break; 1692 case 7: 1693 B->ops->mult = MatMult_SeqSBAIJ_7; 1694 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1695 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1696 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1697 break; 1698 } 1699 } 1700 1701 b->mbs = mbs; 1702 b->nbs = mbs; 1703 if (!skipallocation) { 1704 if (!b->imax) { 1705 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1706 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 1707 } 1708 if (!nnz) { 1709 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1710 else if (nz <= 0) nz = 1; 1711 for (i=0; i<mbs; i++) { 1712 b->imax[i] = nz; 1713 } 1714 nz = nz*mbs; /* total nz */ 1715 } else { 1716 nz = 0; 1717 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1718 } 1719 /* b->ilen will count nonzeros in each block row so far. */ 1720 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1721 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1722 1723 /* allocate the matrix space */ 1724 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 1725 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 1726 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1727 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1728 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1729 b->singlemalloc = PETSC_TRUE; 1730 1731 /* pointer to beginning of each row */ 1732 b->i[0] = 0; 1733 for (i=1; i<mbs+1; i++) { 1734 b->i[i] = b->i[i-1] + b->imax[i-1]; 1735 } 1736 b->free_a = PETSC_TRUE; 1737 b->free_ij = PETSC_TRUE; 1738 } else { 1739 b->free_a = PETSC_FALSE; 1740 b->free_ij = PETSC_FALSE; 1741 } 1742 1743 B->rmap->bs = bs; 1744 b->bs2 = bs2; 1745 b->nz = 0; 1746 b->maxnz = nz*bs2; 1747 1748 b->inew = 0; 1749 b->jnew = 0; 1750 b->anew = 0; 1751 b->a2anew = 0; 1752 b->permute = PETSC_FALSE; 1753 PetscFunctionReturn(0); 1754 } 1755 EXTERN_C_END 1756 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization" 1759 /* 1760 This is used to set the numeric factorization for both Cholesky and ICC symbolic factorization 1761 */ 1762 PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat B,PetscTruth natural) 1763 { 1764 PetscErrorCode ierr; 1765 PetscTruth flg = PETSC_FALSE; 1766 PetscInt bs = B->rmap->bs; 1767 1768 PetscFunctionBegin; 1769 ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_no_unroll",&flg,PETSC_NULL);CHKERRQ(ierr); 1770 if (flg) bs = 8; 1771 1772 if (!natural) { 1773 switch (bs) { 1774 case 1: 1775 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1776 break; 1777 case 2: 1778 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1779 break; 1780 case 3: 1781 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1782 break; 1783 case 4: 1784 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1785 break; 1786 case 5: 1787 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1788 break; 1789 case 6: 1790 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1791 break; 1792 case 7: 1793 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1794 break; 1795 default: 1796 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N; 1797 break; 1798 } 1799 } else { 1800 switch (bs) { 1801 case 1: 1802 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 1803 break; 1804 case 2: 1805 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1806 break; 1807 case 3: 1808 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1809 break; 1810 case 4: 1811 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1812 break; 1813 case 5: 1814 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1815 break; 1816 case 6: 1817 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1818 break; 1819 case 7: 1820 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1821 break; 1822 default: 1823 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 1824 break; 1825 } 1826 } 1827 PetscFunctionReturn(0); 1828 } 1829 1830 EXTERN_C_BEGIN 1831 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1832 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1833 EXTERN_C_END 1834 1835 1836 EXTERN_C_BEGIN 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatGetFactor_seqsbaij_petsc" 1839 PetscErrorCode MatGetFactor_seqsbaij_petsc(Mat A,MatFactorType ftype,Mat *B) 1840 { 1841 PetscInt n = A->rmap->n; 1842 PetscErrorCode ierr; 1843 1844 PetscFunctionBegin; 1845 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 1846 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 1847 if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1848 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 1849 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1850 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; 1851 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; 1852 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 1853 (*B)->factor = ftype; 1854 PetscFunctionReturn(0); 1855 } 1856 EXTERN_C_END 1857 1858 EXTERN_C_BEGIN 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatGetFactorAvailable_seqsbaij_petsc" 1861 PetscErrorCode MatGetFactorAvailable_seqsbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 1862 { 1863 PetscFunctionBegin; 1864 if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1865 *flg = PETSC_TRUE; 1866 } else { 1867 *flg = PETSC_FALSE; 1868 } 1869 PetscFunctionReturn(0); 1870 } 1871 EXTERN_C_END 1872 1873 EXTERN_C_BEGIN 1874 #if defined(PETSC_HAVE_MUMPS) 1875 extern PetscErrorCode MatGetFactor_seqsbaij_mumps(Mat,MatFactorType,Mat*); 1876 #endif 1877 #if defined(PETSC_HAVE_SPOOLES) 1878 extern PetscErrorCode MatGetFactor_seqsbaij_spooles(Mat,MatFactorType,Mat*); 1879 #endif 1880 #if defined(PETSC_HAVE_PASTIX) 1881 extern PetscErrorCode MatGetFactor_seqsbaij_pastix(Mat,MatFactorType,Mat*); 1882 #endif 1883 EXTERN_C_END 1884 1885 /*MC 1886 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1887 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1888 1889 Options Database Keys: 1890 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1891 1892 Level: beginner 1893 1894 .seealso: MatCreateSeqSBAIJ 1895 M*/ 1896 1897 EXTERN_C_BEGIN 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1900 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1901 { 1902 Mat_SeqSBAIJ *b; 1903 PetscErrorCode ierr; 1904 PetscMPIInt size; 1905 PetscTruth no_unroll = PETSC_FALSE,no_inode = PETSC_FALSE; 1906 1907 PetscFunctionBegin; 1908 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 1909 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1910 1911 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1912 B->data = (void*)b; 1913 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1914 B->ops->destroy = MatDestroy_SeqSBAIJ; 1915 B->ops->view = MatView_SeqSBAIJ; 1916 B->mapping = 0; 1917 b->row = 0; 1918 b->icol = 0; 1919 b->reallocs = 0; 1920 b->saved_values = 0; 1921 b->inode.limit = 5; 1922 b->inode.max_limit = 5; 1923 1924 b->roworiented = PETSC_TRUE; 1925 b->nonew = 0; 1926 b->diag = 0; 1927 b->solve_work = 0; 1928 b->mult_work = 0; 1929 B->spptr = 0; 1930 b->keepnonzeropattern = PETSC_FALSE; 1931 b->xtoy = 0; 1932 b->XtoY = 0; 1933 1934 b->inew = 0; 1935 b->jnew = 0; 1936 b->anew = 0; 1937 b->a2anew = 0; 1938 b->permute = PETSC_FALSE; 1939 1940 b->ignore_ltriangular = PETSC_FALSE; 1941 ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_ignore_lower_triangular",&b->ignore_ltriangular,PETSC_NULL);CHKERRQ(ierr); 1942 1943 b->getrow_utriangular = PETSC_FALSE; 1944 ierr = PetscOptionsGetTruth(((PetscObject)B)->prefix,"-mat_getrow_uppertriangular",&b->getrow_utriangular,PETSC_NULL);CHKERRQ(ierr); 1945 1946 #if defined(PETSC_HAVE_PASTIX) 1947 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_pastix_C", 1948 "MatGetFactor_seqsbaij_pastix", 1949 MatGetFactor_seqsbaij_pastix);CHKERRQ(ierr); 1950 #endif 1951 #if defined(PETSC_HAVE_SPOOLES) 1952 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_spooles_C", 1953 "MatGetFactor_seqsbaij_spooles", 1954 MatGetFactor_seqsbaij_spooles);CHKERRQ(ierr); 1955 #endif 1956 #if defined(PETSC_HAVE_MUMPS) 1957 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_mumps_C", 1958 "MatGetFactor_seqsbaij_mumps", 1959 MatGetFactor_seqsbaij_mumps);CHKERRQ(ierr); 1960 #endif 1961 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqsbaij_petsc_C", 1962 "MatGetFactorAvailable_seqsbaij_petsc", 1963 MatGetFactorAvailable_seqsbaij_petsc);CHKERRQ(ierr); 1964 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_petsc_C", 1965 "MatGetFactor_seqsbaij_petsc", 1966 MatGetFactor_seqsbaij_petsc);CHKERRQ(ierr); 1967 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1968 "MatStoreValues_SeqSBAIJ", 1969 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1970 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1971 "MatRetrieveValues_SeqSBAIJ", 1972 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1973 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1974 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1975 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1976 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1977 "MatConvert_SeqSBAIJ_SeqAIJ", 1978 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1979 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1980 "MatConvert_SeqSBAIJ_SeqBAIJ", 1981 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1982 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1983 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1984 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1985 1986 B->symmetric = PETSC_TRUE; 1987 B->structurally_symmetric = PETSC_TRUE; 1988 B->symmetric_set = PETSC_TRUE; 1989 B->structurally_symmetric_set = PETSC_TRUE; 1990 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1991 1992 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for SEQSBAIJ matrix","Mat");CHKERRQ(ierr); 1993 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for inodes (slower)",PETSC_NULL,no_unroll,&no_unroll,PETSC_NULL);CHKERRQ(ierr); 1994 if (no_unroll) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_unroll\n");CHKERRQ(ierr);} 1995 ierr = PetscOptionsTruth("-mat_no_inode","Do not optimize for inodes (slower)",PETSC_NULL,no_inode,&no_inode,PETSC_NULL);CHKERRQ(ierr); 1996 if (no_inode) {ierr = PetscInfo(B,"Not using Inode routines due to -mat_no_inode\n");CHKERRQ(ierr);} 1997 ierr = PetscOptionsInt("-mat_inode_limit","Do not use inodes larger then this value",PETSC_NULL,b->inode.limit,&b->inode.limit,PETSC_NULL);CHKERRQ(ierr); 1998 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1999 b->inode.use = (PetscTruth)(!(no_unroll || no_inode)); 2000 if (b->inode.limit > b->inode.max_limit) b->inode.limit = b->inode.max_limit; 2001 2002 PetscFunctionReturn(0); 2003 } 2004 EXTERN_C_END 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 2008 /*@C 2009 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 2010 compressed row) format. For good matrix assembly performance the 2011 user should preallocate the matrix storage by setting the parameter nz 2012 (or the array nnz). By setting these parameters accurately, performance 2013 during matrix assembly can be increased by more than a factor of 50. 2014 2015 Collective on Mat 2016 2017 Input Parameters: 2018 + A - the symmetric matrix 2019 . bs - size of block 2020 . nz - number of block nonzeros per block row (same for all rows) 2021 - nnz - array containing the number of block nonzeros in the upper triangular plus 2022 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 2023 2024 Options Database Keys: 2025 . -mat_no_unroll - uses code that does not unroll the loops in the 2026 block calculations (much slower) 2027 . -mat_block_size - size of the blocks to use (only works if a negative bs is passed in 2028 2029 Level: intermediate 2030 2031 Notes: 2032 Specify the preallocated storage with either nz or nnz (not both). 2033 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2034 allocation. For additional details, see the users manual chapter on 2035 matrices. 2036 2037 You can call MatGetInfo() to get information on how effective the preallocation was; 2038 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2039 You can also run with the option -info and look for messages with the string 2040 malloc in them to see if additional memory allocation was needed. 2041 2042 If the nnz parameter is given then the nz parameter is ignored 2043 2044 2045 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 2046 @*/ 2047 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2048 { 2049 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2050 2051 PetscFunctionBegin; 2052 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2053 if (f) { 2054 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2055 } 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatCreateSeqSBAIJ" 2061 /*@C 2062 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 2063 compressed row) format. For good matrix assembly performance the 2064 user should preallocate the matrix storage by setting the parameter nz 2065 (or the array nnz). By setting these parameters accurately, performance 2066 during matrix assembly can be increased by more than a factor of 50. 2067 2068 Collective on MPI_Comm 2069 2070 Input Parameters: 2071 + comm - MPI communicator, set to PETSC_COMM_SELF 2072 . bs - size of block 2073 . m - number of rows, or number of columns 2074 . nz - number of block nonzeros per block row (same for all rows) 2075 - nnz - array containing the number of block nonzeros in the upper triangular plus 2076 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 2077 2078 Output Parameter: 2079 . A - the symmetric matrix 2080 2081 Options Database Keys: 2082 . -mat_no_unroll - uses code that does not unroll the loops in the 2083 block calculations (much slower) 2084 . -mat_block_size - size of the blocks to use 2085 2086 Level: intermediate 2087 2088 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2089 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2090 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2091 2092 Notes: 2093 The number of rows and columns must be divisible by blocksize. 2094 This matrix type does not support complex Hermitian operation. 2095 2096 Specify the preallocated storage with either nz or nnz (not both). 2097 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2098 allocation. For additional details, see the users manual chapter on 2099 matrices. 2100 2101 If the nnz parameter is given then the nz parameter is ignored 2102 2103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 2104 @*/ 2105 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2106 { 2107 PetscErrorCode ierr; 2108 2109 PetscFunctionBegin; 2110 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2111 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2112 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2113 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2114 PetscFunctionReturn(0); 2115 } 2116 2117 #undef __FUNCT__ 2118 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 2119 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2120 { 2121 Mat C; 2122 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 2123 PetscErrorCode ierr; 2124 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2125 2126 PetscFunctionBegin; 2127 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2128 2129 *B = 0; 2130 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2131 ierr = MatSetSizes(C,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 2132 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2133 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2134 c = (Mat_SeqSBAIJ*)C->data; 2135 2136 C->preallocated = PETSC_TRUE; 2137 C->factor = A->factor; 2138 c->row = 0; 2139 c->icol = 0; 2140 c->saved_values = 0; 2141 c->keepnonzeropattern = a->keepnonzeropattern; 2142 C->assembled = PETSC_TRUE; 2143 2144 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 2145 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 2146 c->bs2 = a->bs2; 2147 c->mbs = a->mbs; 2148 c->nbs = a->nbs; 2149 2150 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 2151 for (i=0; i<mbs; i++) { 2152 c->imax[i] = a->imax[i]; 2153 c->ilen[i] = a->ilen[i]; 2154 } 2155 2156 /* allocate the matrix space */ 2157 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2158 c->singlemalloc = PETSC_TRUE; 2159 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2160 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 2161 if (mbs > 0) { 2162 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2163 if (cpvalues == MAT_COPY_VALUES) { 2164 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2165 } else { 2166 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2167 } 2168 if (a->jshort) { 2169 ierr = PetscMalloc(nz*sizeof(unsigned short),&c->jshort);CHKERRQ(ierr); 2170 ierr = PetscMemcpy(c->jshort,a->jshort,nz*sizeof(unsigned short));CHKERRQ(ierr); 2171 } 2172 } 2173 2174 c->roworiented = a->roworiented; 2175 c->nonew = a->nonew; 2176 2177 if (a->diag) { 2178 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2179 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2180 for (i=0; i<mbs; i++) { 2181 c->diag[i] = a->diag[i]; 2182 } 2183 } else c->diag = 0; 2184 c->nz = a->nz; 2185 c->maxnz = a->maxnz; 2186 c->solve_work = 0; 2187 c->mult_work = 0; 2188 c->free_a = PETSC_TRUE; 2189 c->free_ij = PETSC_TRUE; 2190 *B = C; 2191 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatLoad_SeqSBAIJ" 2197 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, const MatType type,Mat *A) 2198 { 2199 Mat_SeqSBAIJ *a; 2200 Mat B; 2201 PetscErrorCode ierr; 2202 int fd; 2203 PetscMPIInt size; 2204 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2205 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 2206 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2207 PetscInt *masked,nmask,tmp,bs2,ishift; 2208 PetscScalar *aa; 2209 MPI_Comm comm = ((PetscObject)viewer)->comm; 2210 2211 PetscFunctionBegin; 2212 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2213 bs2 = bs*bs; 2214 2215 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2216 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2217 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2218 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2219 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2220 M = header[1]; N = header[2]; nz = header[3]; 2221 2222 if (header[3] < 0) { 2223 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 2224 } 2225 2226 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2227 2228 /* 2229 This code adds extra rows to make sure the number of rows is 2230 divisible by the blocksize 2231 */ 2232 mbs = M/bs; 2233 extra_rows = bs - M + bs*(mbs); 2234 if (extra_rows == bs) extra_rows = 0; 2235 else mbs++; 2236 if (extra_rows) { 2237 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2238 } 2239 2240 /* read in row lengths */ 2241 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2242 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2243 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2244 2245 /* read in column indices */ 2246 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2247 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2248 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2249 2250 /* loop over row lengths determining block row lengths */ 2251 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 2252 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2253 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2254 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2255 masked = mask + mbs; 2256 rowcount = 0; nzcount = 0; 2257 for (i=0; i<mbs; i++) { 2258 nmask = 0; 2259 for (j=0; j<bs; j++) { 2260 kmax = rowlengths[rowcount]; 2261 for (k=0; k<kmax; k++) { 2262 tmp = jj[nzcount++]/bs; /* block col. index */ 2263 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 2264 } 2265 rowcount++; 2266 } 2267 s_browlengths[i] += nmask; 2268 2269 /* zero out the mask elements we set */ 2270 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2271 } 2272 2273 /* create our matrix */ 2274 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 2275 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2276 ierr = MatSetType(B,type);CHKERRQ(ierr); 2277 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 2278 a = (Mat_SeqSBAIJ*)B->data; 2279 2280 /* set matrix "i" values */ 2281 a->i[0] = 0; 2282 for (i=1; i<= mbs; i++) { 2283 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 2284 a->ilen[i-1] = s_browlengths[i-1]; 2285 } 2286 a->nz = a->i[mbs]; 2287 2288 /* read in nonzero values */ 2289 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2290 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2291 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2292 2293 /* set "a" and "j" values into matrix */ 2294 nzcount = 0; jcount = 0; 2295 for (i=0; i<mbs; i++) { 2296 nzcountb = nzcount; 2297 nmask = 0; 2298 for (j=0; j<bs; j++) { 2299 kmax = rowlengths[i*bs+j]; 2300 for (k=0; k<kmax; k++) { 2301 tmp = jj[nzcount++]/bs; /* block col. index */ 2302 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 2303 } 2304 } 2305 /* sort the masked values */ 2306 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2307 2308 /* set "j" values into matrix */ 2309 maskcount = 1; 2310 for (j=0; j<nmask; j++) { 2311 a->j[jcount++] = masked[j]; 2312 mask[masked[j]] = maskcount++; 2313 } 2314 2315 /* set "a" values into matrix */ 2316 ishift = bs2*a->i[i]; 2317 for (j=0; j<bs; j++) { 2318 kmax = rowlengths[i*bs+j]; 2319 for (k=0; k<kmax; k++) { 2320 tmp = jj[nzcountb]/bs ; /* block col. index */ 2321 if (tmp >= i){ 2322 block = mask[tmp] - 1; 2323 point = jj[nzcountb] - bs*tmp; 2324 idx = ishift + bs2*block + j + bs*point; 2325 a->a[idx] = aa[nzcountb]; 2326 } 2327 nzcountb++; 2328 } 2329 } 2330 /* zero out the mask elements we set */ 2331 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2332 } 2333 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2334 2335 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2336 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 2337 ierr = PetscFree(aa);CHKERRQ(ierr); 2338 ierr = PetscFree(jj);CHKERRQ(ierr); 2339 ierr = PetscFree(mask);CHKERRQ(ierr); 2340 2341 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2342 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 ierr = MatView_Private(B);CHKERRQ(ierr); 2344 *A = B; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatRelax_SeqSBAIJ" 2350 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2351 { 2352 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2353 const MatScalar *aa=a->a,*v,*v1,*aidiag; 2354 PetscScalar *x,*b,*t,sum; 2355 MatScalar tmp; 2356 PetscErrorCode ierr; 2357 PetscInt m=a->mbs,bs=A->rmap->bs,j; 2358 const PetscInt *ai=a->i,*aj=a->j,*vj,*vj1; 2359 PetscInt nz,nz1,i; 2360 2361 PetscFunctionBegin; 2362 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 2363 2364 its = its*lits; 2365 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2366 2367 if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2368 2369 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2370 if (xx != bb) { 2371 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2372 } else { 2373 b = x; 2374 } 2375 2376 if (!a->idiagvalid) { 2377 if (!a->idiag) { 2378 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 2379 } 2380 for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]]; 2381 a->idiagvalid = PETSC_TRUE; 2382 } 2383 2384 if (!a->relax_work) { 2385 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 2386 } 2387 t = a->relax_work; 2388 2389 aidiag = a->idiag; 2390 if (flag & SOR_ZERO_INITIAL_GUESS) { 2391 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2392 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2393 2394 v = aa + 1; 2395 vj = aj + 1; 2396 for (i=0; i<m; i++){ 2397 nz = ai[i+1] - ai[i] - 1; 2398 tmp = - (x[i] = omega*t[i]*aidiag[i]); 2399 for (j=0; j<nz; j++) { 2400 t[vj[j]] += tmp*v[j]; 2401 } 2402 v += nz + 1; 2403 vj += nz + 1; 2404 } 2405 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 2406 } 2407 2408 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2409 int nz2; 2410 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2411 t = b; 2412 } 2413 2414 v = aa + ai[m-1] + 1; 2415 vj = aj + ai[m-1] + 1; 2416 nz = 0; 2417 for (i=m-1; i>=0; i--){ 2418 sum = 0.0; 2419 nz2 = ai[i] - ai[i-1] - 1; 2420 PETSC_Prefetch(v-nz2-1,0,1); 2421 PETSC_Prefetch(vj-nz2-1,0,1); 2422 PetscSparseDensePlusDot(sum,x,v,vj,nz); 2423 sum = t[i] - sum; 2424 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 2425 nz = nz2; 2426 v -= nz + 1; 2427 vj -= nz + 1; 2428 } 2429 t = a->relax_work; 2430 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 2431 } 2432 its--; 2433 } 2434 2435 while (its--) { 2436 /* 2437 forward sweep: 2438 for i=0,...,m-1: 2439 sum[i] = (b[i] - U(i,:)x )/d[i]; 2440 x[i] = (1-omega)x[i] + omega*sum[i]; 2441 b = b - x[i]*U^T(i,:); 2442 2443 */ 2444 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2445 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2446 2447 for (i=0; i<m; i++){ 2448 v = aa + ai[i] + 1; v1=v; 2449 vj = aj + ai[i] + 1; vj1=vj; 2450 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2451 sum = t[i]; 2452 ierr = PetscLogFlops(4.0*nz-2);CHKERRQ(ierr); 2453 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2454 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 2455 while (nz--) t[*vj++] -= x[i]*(*v++); 2456 } 2457 } 2458 2459 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2460 /* 2461 backward sweep: 2462 b = b - x[i]*U^T(i,:), i=0,...,n-2 2463 for i=m-1,...,0: 2464 sum[i] = (b[i] - U(i,:)x )/d[i]; 2465 x[i] = (1-omega)x[i] + omega*sum[i]; 2466 */ 2467 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2468 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2469 2470 for (i=0; i<m-1; i++){ /* update rhs */ 2471 v = aa + ai[i] + 1; 2472 vj = aj + ai[i] + 1; 2473 nz = ai[i+1] - ai[i] - 1; 2474 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 2475 while (nz--) t[*vj++] -= x[i]*(*v++); 2476 } 2477 for (i=m-1; i>=0; i--){ 2478 v = aa + ai[i] + 1; 2479 vj = aj + ai[i] + 1; 2480 nz = ai[i+1] - ai[i] - 1; 2481 ierr = PetscLogFlops(2.0*nz-1);CHKERRQ(ierr); 2482 sum = t[i]; 2483 while (nz--) sum -= x[*vj++]*(*v++); 2484 x[i] = (1-omega)*x[i] + omega*sum*aidiag[i]; 2485 } 2486 } 2487 } 2488 2489 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2490 if (bb != xx) { 2491 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2492 } 2493 PetscFunctionReturn(0); 2494 } 2495 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2498 /*@ 2499 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2500 (upper triangular entries in CSR format) provided by the user. 2501 2502 Collective on MPI_Comm 2503 2504 Input Parameters: 2505 + comm - must be an MPI communicator of size 1 2506 . bs - size of block 2507 . m - number of rows 2508 . n - number of columns 2509 . i - row indices 2510 . j - column indices 2511 - a - matrix values 2512 2513 Output Parameter: 2514 . mat - the matrix 2515 2516 Level: intermediate 2517 2518 Notes: 2519 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2520 once the matrix is destroyed 2521 2522 You cannot set new nonzero locations into this matrix, that will generate an error. 2523 2524 The i and j indices are 0 based 2525 2526 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2527 2528 @*/ 2529 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2530 { 2531 PetscErrorCode ierr; 2532 PetscInt ii; 2533 Mat_SeqSBAIJ *sbaij; 2534 2535 PetscFunctionBegin; 2536 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2537 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2538 2539 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2540 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2541 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2542 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2543 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2544 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2545 2546 sbaij->i = i; 2547 sbaij->j = j; 2548 sbaij->a = a; 2549 sbaij->singlemalloc = PETSC_FALSE; 2550 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2551 sbaij->free_a = PETSC_FALSE; 2552 sbaij->free_ij = PETSC_FALSE; 2553 2554 for (ii=0; ii<m; ii++) { 2555 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2556 #if defined(PETSC_USE_DEBUG) 2557 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2558 #endif 2559 } 2560 #if defined(PETSC_USE_DEBUG) 2561 for (ii=0; ii<sbaij->i[m]; ii++) { 2562 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2563 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2564 } 2565 #endif 2566 2567 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2568 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2569 PetscFunctionReturn(0); 2570 } 2571 2572 2573 2574 2575 2576