1 /*$Id: baij.c,v 1.238 2001/07/16 04:47:34 buschelm Exp buschelm $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 if (a->bs==4) { 216 PetscTruth sse_enabled=PETSC_FALSE; 217 int ierr; 218 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 219 #if defined(PETSC_HAVE_SSE) 220 if (sse_enabled) { 221 a->single_precision_solves = PETSC_TRUE; 222 A->ops->solve = MatSolve_SeqBAIJ_Update; 223 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 224 break; 225 } 226 #else 227 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 228 #endif 229 } 230 break; 231 default: 232 SETERRQ(PETSC_ERR_SUP,"unknown option"); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 239 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 240 { 241 PetscFunctionBegin; 242 if (m) *m = 0; 243 if (n) *n = A->m; 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatGetRow_SeqBAIJ" 249 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 250 { 251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 252 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 253 MatScalar *aa,*aa_i; 254 Scalar *v_i; 255 256 PetscFunctionBegin; 257 bs = a->bs; 258 ai = a->i; 259 aj = a->j; 260 aa = a->a; 261 bs2 = a->bs2; 262 263 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 264 265 bn = row/bs; /* Block number */ 266 bp = row % bs; /* Block Position */ 267 M = ai[bn+1] - ai[bn]; 268 *nz = bs*M; 269 270 if (v) { 271 *v = 0; 272 if (*nz) { 273 ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr); 274 for (i=0; i<M; i++) { /* for each block in the block row */ 275 v_i = *v + i*bs; 276 aa_i = aa + bs2*(ai[bn] + i); 277 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 278 } 279 } 280 } 281 282 if (idx) { 283 *idx = 0; 284 if (*nz) { 285 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 286 for (i=0; i<M; i++) { /* for each block in the block row */ 287 idx_i = *idx + i*bs; 288 itmp = bs*aj[ai[bn] + i]; 289 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 290 } 291 } 292 } 293 PetscFunctionReturn(0); 294 } 295 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 298 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 299 { 300 int ierr; 301 302 PetscFunctionBegin; 303 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 304 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 305 PetscFunctionReturn(0); 306 } 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatTranspose_SeqBAIJ" 310 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 311 { 312 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 313 Mat C; 314 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 315 int *rows,*cols,bs2=a->bs2; 316 Scalar *array; 317 318 PetscFunctionBegin; 319 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 320 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 321 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 322 323 #if defined(PETSC_USE_MAT_SINGLE) 324 ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr); 325 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 326 #else 327 array = a->a; 328 #endif 329 330 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 331 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 332 ierr = PetscFree(col);CHKERRQ(ierr); 333 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 334 cols = rows + bs; 335 for (i=0; i<mbs; i++) { 336 cols[0] = i*bs; 337 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 338 len = ai[i+1] - ai[i]; 339 for (j=0; j<len; j++) { 340 rows[0] = (*aj++)*bs; 341 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 342 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 343 array += bs2; 344 } 345 } 346 ierr = PetscFree(rows);CHKERRQ(ierr); 347 #if defined(PETSC_USE_MAT_SINGLE) 348 ierr = PetscFree(array); 349 #endif 350 351 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 354 if (B) { 355 *B = C; 356 } else { 357 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 364 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 365 { 366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 367 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 368 Scalar *aa; 369 FILE *file; 370 371 PetscFunctionBegin; 372 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 373 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 374 col_lens[0] = MAT_COOKIE; 375 376 col_lens[1] = A->m; 377 col_lens[2] = A->n; 378 col_lens[3] = a->nz*bs2; 379 380 /* store lengths of each row and write (including header) to file */ 381 count = 0; 382 for (i=0; i<a->mbs; i++) { 383 for (j=0; j<bs; j++) { 384 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 385 } 386 } 387 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 388 ierr = PetscFree(col_lens);CHKERRQ(ierr); 389 390 /* store column indices (zero start index) */ 391 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 392 count = 0; 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 for (k=a->i[i]; k<a->i[i+1]; k++) { 396 for (l=0; l<bs; l++) { 397 jj[count++] = bs*a->j[k] + l; 398 } 399 } 400 } 401 } 402 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 403 ierr = PetscFree(jj);CHKERRQ(ierr); 404 405 /* store nonzero values */ 406 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr); 407 count = 0; 408 for (i=0; i<a->mbs; i++) { 409 for (j=0; j<bs; j++) { 410 for (k=a->i[i]; k<a->i[i+1]; k++) { 411 for (l=0; l<bs; l++) { 412 aa[count++] = a->a[bs2*k + l*bs + j]; 413 } 414 } 415 } 416 } 417 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 418 ierr = PetscFree(aa);CHKERRQ(ierr); 419 420 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 421 if (file) { 422 fprintf(file,"-matload_block_size %d\n",a->bs); 423 } 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 429 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 432 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 433 PetscViewerFormat format; 434 435 PetscFunctionBegin; 436 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 437 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 438 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 439 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 440 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 441 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 442 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 443 for (i=0; i<a->mbs; i++) { 444 for (j=0; j<bs; j++) { 445 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 446 for (k=a->i[i]; k<a->i[i+1]; k++) { 447 for (l=0; l<bs; l++) { 448 #if defined(PETSC_USE_COMPLEX) 449 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 450 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 451 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 452 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 453 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 454 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 455 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 456 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 457 } 458 #else 459 if (a->a[bs2*k + l*bs + j] != 0.0) { 460 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 461 } 462 #endif 463 } 464 } 465 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 466 } 467 } 468 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 469 } else { 470 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 471 for (i=0; i<a->mbs; i++) { 472 for (j=0; j<bs; j++) { 473 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 474 for (k=a->i[i]; k<a->i[i+1]; k++) { 475 for (l=0; l<bs; l++) { 476 #if defined(PETSC_USE_COMPLEX) 477 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 478 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 479 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 480 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 481 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 482 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 483 } else { 484 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 485 } 486 #else 487 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 488 #endif 489 } 490 } 491 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 492 } 493 } 494 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 495 } 496 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 502 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 503 { 504 Mat A = (Mat) Aa; 505 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 506 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 507 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 508 MatScalar *aa; 509 PetscViewer viewer; 510 511 PetscFunctionBegin; 512 513 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 514 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 515 516 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 517 518 /* loop over matrix elements drawing boxes */ 519 color = PETSC_DRAW_BLUE; 520 for (i=0,row=0; i<mbs; i++,row+=bs) { 521 for (j=a->i[i]; j<a->i[i+1]; j++) { 522 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 523 x_l = a->j[j]*bs; x_r = x_l + 1.0; 524 aa = a->a + j*bs2; 525 for (k=0; k<bs; k++) { 526 for (l=0; l<bs; l++) { 527 if (PetscRealPart(*aa++) >= 0.) continue; 528 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 529 } 530 } 531 } 532 } 533 color = PETSC_DRAW_CYAN; 534 for (i=0,row=0; i<mbs; i++,row+=bs) { 535 for (j=a->i[i]; j<a->i[i+1]; j++) { 536 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 537 x_l = a->j[j]*bs; x_r = x_l + 1.0; 538 aa = a->a + j*bs2; 539 for (k=0; k<bs; k++) { 540 for (l=0; l<bs; l++) { 541 if (PetscRealPart(*aa++) != 0.) continue; 542 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 543 } 544 } 545 } 546 } 547 548 color = PETSC_DRAW_RED; 549 for (i=0,row=0; i<mbs; i++,row+=bs) { 550 for (j=a->i[i]; j<a->i[i+1]; j++) { 551 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 552 x_l = a->j[j]*bs; x_r = x_l + 1.0; 553 aa = a->a + j*bs2; 554 for (k=0; k<bs; k++) { 555 for (l=0; l<bs; l++) { 556 if (PetscRealPart(*aa++) <= 0.) continue; 557 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 567 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 568 { 569 int ierr; 570 PetscReal xl,yl,xr,yr,w,h; 571 PetscDraw draw; 572 PetscTruth isnull; 573 574 PetscFunctionBegin; 575 576 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 577 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 578 579 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 580 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 581 xr += w; yr += h; xl = -w; yl = -h; 582 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 583 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 584 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_SeqBAIJ" 590 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 591 { 592 int ierr; 593 PetscTruth issocket,isascii,isbinary,isdraw; 594 595 PetscFunctionBegin; 596 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 600 if (issocket) { 601 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 602 } else if (isascii){ 603 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 606 } else if (isdraw) { 607 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 608 } else { 609 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 610 } 611 PetscFunctionReturn(0); 612 } 613 614 615 #undef __FUNCT__ 616 #define __FUNCT__ "MatGetValues_SeqBAIJ" 617 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 618 { 619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 620 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 621 int *ai = a->i,*ailen = a->ilen; 622 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 623 MatScalar *ap,*aa = a->a,zero = 0.0; 624 625 PetscFunctionBegin; 626 for (k=0; k<m; k++) { /* loop over rows */ 627 row = im[k]; brow = row/bs; 628 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 629 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 630 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 631 nrow = ailen[brow]; 632 for (l=0; l<n; l++) { /* loop over columns */ 633 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 634 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 635 col = in[l] ; 636 bcol = col/bs; 637 cidx = col%bs; 638 ridx = row%bs; 639 high = nrow; 640 low = 0; /* assume unsorted */ 641 while (high-low > 5) { 642 t = (low+high)/2; 643 if (rp[t] > bcol) high = t; 644 else low = t; 645 } 646 for (i=low; i<high; i++) { 647 if (rp[i] > bcol) break; 648 if (rp[i] == bcol) { 649 *v++ = ap[bs2*i+bs*cidx+ridx]; 650 goto finished; 651 } 652 } 653 *v++ = zero; 654 finished:; 655 } 656 } 657 PetscFunctionReturn(0); 658 } 659 660 #if defined(PETSC_USE_MAT_SINGLE) 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 663 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 664 { 665 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 666 int ierr,i,N = m*n*b->bs2; 667 MatScalar *vsingle; 668 669 PetscFunctionBegin; 670 if (N > b->setvalueslen) { 671 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 672 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 673 b->setvalueslen = N; 674 } 675 vsingle = b->setvaluescopy; 676 for (i=0; i<N; i++) { 677 vsingle[i] = v[i]; 678 } 679 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 #endif 683 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 687 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 688 { 689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 690 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 691 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 692 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 693 PetscTruth roworiented=a->roworiented; 694 MatScalar *value = v,*ap,*aa = a->a,*bap; 695 696 PetscFunctionBegin; 697 if (roworiented) { 698 stepval = (n-1)*bs; 699 } else { 700 stepval = (m-1)*bs; 701 } 702 for (k=0; k<m; k++) { /* loop over added rows */ 703 row = im[k]; 704 if (row < 0) continue; 705 #if defined(PETSC_USE_BOPT_g) 706 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 707 #endif 708 rp = aj + ai[row]; 709 ap = aa + bs2*ai[row]; 710 rmax = imax[row]; 711 nrow = ailen[row]; 712 low = 0; 713 for (l=0; l<n; l++) { /* loop over added columns */ 714 if (in[l] < 0) continue; 715 #if defined(PETSC_USE_BOPT_g) 716 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 717 #endif 718 col = in[l]; 719 if (roworiented) { 720 value = v + k*(stepval+bs)*bs + l*bs; 721 } else { 722 value = v + l*(stepval+bs)*bs + k*bs; 723 } 724 if (!sorted) low = 0; high = nrow; 725 while (high-low > 7) { 726 t = (low+high)/2; 727 if (rp[t] > col) high = t; 728 else low = t; 729 } 730 for (i=low; i<high; i++) { 731 if (rp[i] > col) break; 732 if (rp[i] == col) { 733 bap = ap + bs2*i; 734 if (roworiented) { 735 if (is == ADD_VALUES) { 736 for (ii=0; ii<bs; ii++,value+=stepval) { 737 for (jj=ii; jj<bs2; jj+=bs) { 738 bap[jj] += *value++; 739 } 740 } 741 } else { 742 for (ii=0; ii<bs; ii++,value+=stepval) { 743 for (jj=ii; jj<bs2; jj+=bs) { 744 bap[jj] = *value++; 745 } 746 } 747 } 748 } else { 749 if (is == ADD_VALUES) { 750 for (ii=0; ii<bs; ii++,value+=stepval) { 751 for (jj=0; jj<bs; jj++) { 752 *bap++ += *value++; 753 } 754 } 755 } else { 756 for (ii=0; ii<bs; ii++,value+=stepval) { 757 for (jj=0; jj<bs; jj++) { 758 *bap++ = *value++; 759 } 760 } 761 } 762 } 763 goto noinsert2; 764 } 765 } 766 if (nonew == 1) goto noinsert2; 767 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 768 if (nrow >= rmax) { 769 /* there is no extra room in row, therefore enlarge */ 770 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 771 MatScalar *new_a; 772 773 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 774 775 /* malloc new storage space */ 776 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 777 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 778 new_j = (int*)(new_a + bs2*new_nz); 779 new_i = new_j + new_nz; 780 781 /* copy over old data into new slots */ 782 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 783 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 784 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 785 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 786 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 787 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 788 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 789 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 790 /* free up old matrix storage */ 791 ierr = PetscFree(a->a);CHKERRQ(ierr); 792 if (!a->singlemalloc) { 793 ierr = PetscFree(a->i);CHKERRQ(ierr); 794 ierr = PetscFree(a->j);CHKERRQ(ierr); 795 } 796 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 797 a->singlemalloc = PETSC_TRUE; 798 799 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 800 rmax = imax[row] = imax[row] + CHUNKSIZE; 801 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 802 a->maxnz += bs2*CHUNKSIZE; 803 a->reallocs++; 804 a->nz++; 805 } 806 N = nrow++ - 1; 807 /* shift up all the later entries in this row */ 808 for (ii=N; ii>=i; ii--) { 809 rp[ii+1] = rp[ii]; 810 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 811 } 812 if (N >= i) { 813 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 814 } 815 rp[i] = col; 816 bap = ap + bs2*i; 817 if (roworiented) { 818 for (ii=0; ii<bs; ii++,value+=stepval) { 819 for (jj=ii; jj<bs2; jj+=bs) { 820 bap[jj] = *value++; 821 } 822 } 823 } else { 824 for (ii=0; ii<bs; ii++,value+=stepval) { 825 for (jj=0; jj<bs; jj++) { 826 *bap++ = *value++; 827 } 828 } 829 } 830 noinsert2:; 831 low = i; 832 } 833 ailen[row] = nrow; 834 } 835 PetscFunctionReturn(0); 836 } 837 838 839 #undef __FUNCT__ 840 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 841 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 842 { 843 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 844 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 845 int m = A->m,*ip,N,*ailen = a->ilen; 846 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 847 MatScalar *aa = a->a,*ap; 848 849 PetscFunctionBegin; 850 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 851 852 if (m) rmax = ailen[0]; 853 for (i=1; i<mbs; i++) { 854 /* move each row back by the amount of empty slots (fshift) before it*/ 855 fshift += imax[i-1] - ailen[i-1]; 856 rmax = PetscMax(rmax,ailen[i]); 857 if (fshift) { 858 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 859 N = ailen[i]; 860 for (j=0; j<N; j++) { 861 ip[j-fshift] = ip[j]; 862 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 863 } 864 } 865 ai[i] = ai[i-1] + ailen[i-1]; 866 } 867 if (mbs) { 868 fshift += imax[mbs-1] - ailen[mbs-1]; 869 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 870 } 871 /* reset ilen and imax for each row */ 872 for (i=0; i<mbs; i++) { 873 ailen[i] = imax[i] = ai[i+1] - ai[i]; 874 } 875 a->nz = ai[mbs]; 876 877 /* diagonals may have moved, so kill the diagonal pointers */ 878 if (fshift && a->diag) { 879 ierr = PetscFree(a->diag);CHKERRQ(ierr); 880 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 881 a->diag = 0; 882 } 883 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 884 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 885 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 886 a->reallocs); 887 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 888 a->reallocs = 0; 889 A->info.nz_unneeded = (PetscReal)fshift*bs2; 890 891 PetscFunctionReturn(0); 892 } 893 894 895 896 /* 897 This function returns an array of flags which indicate the locations of contiguous 898 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 899 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 900 Assume: sizes should be long enough to hold all the values. 901 */ 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 904 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 905 { 906 int i,j,k,row; 907 PetscTruth flg; 908 909 PetscFunctionBegin; 910 for (i=0,j=0; i<n; j++) { 911 row = idx[i]; 912 if (row%bs!=0) { /* Not the begining of a block */ 913 sizes[j] = 1; 914 i++; 915 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 916 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 917 i++; 918 } else { /* Begining of the block, so check if the complete block exists */ 919 flg = PETSC_TRUE; 920 for (k=1; k<bs; k++) { 921 if (row+k != idx[i+k]) { /* break in the block */ 922 flg = PETSC_FALSE; 923 break; 924 } 925 } 926 if (flg == PETSC_TRUE) { /* No break in the bs */ 927 sizes[j] = bs; 928 i+= bs; 929 } else { 930 sizes[j] = 1; 931 i++; 932 } 933 } 934 } 935 *bs_max = j; 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 941 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 942 { 943 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 944 int ierr,i,j,k,count,is_n,*is_idx,*rows; 945 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 946 Scalar zero = 0.0; 947 MatScalar *aa; 948 949 PetscFunctionBegin; 950 /* Make a copy of the IS and sort it */ 951 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 952 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 953 954 /* allocate memory for rows,sizes */ 955 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 956 sizes = rows + is_n; 957 958 /* copy IS values to rows, and sort them */ 959 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 960 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 961 if (baij->keepzeroedrows) { 962 for (i=0; i<is_n; i++) { sizes[i] = 1; } 963 bs_max = is_n; 964 } else { 965 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 966 } 967 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 968 969 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 970 row = rows[j]; 971 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 972 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 973 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 974 if (sizes[i] == bs && !baij->keepzeroedrows) { 975 if (diag) { 976 if (baij->ilen[row/bs] > 0) { 977 baij->ilen[row/bs] = 1; 978 baij->j[baij->i[row/bs]] = row/bs; 979 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 /* Now insert all the diagonal values for this bs */ 982 for (k=0; k<bs; k++) { 983 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 984 } 985 } else { /* (!diag) */ 986 baij->ilen[row/bs] = 0; 987 } /* end (!diag) */ 988 } else { /* (sizes[i] != bs) */ 989 #if defined (PETSC_USE_DEBUG) 990 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 991 #endif 992 for (k=0; k<count; k++) { 993 aa[0] = zero; 994 aa += bs; 995 } 996 if (diag) { 997 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 998 } 999 } 1000 } 1001 1002 ierr = PetscFree(rows);CHKERRQ(ierr); 1003 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1009 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 1010 { 1011 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1012 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1013 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1014 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1015 int ridx,cidx,bs2=a->bs2,ierr; 1016 PetscTruth roworiented=a->roworiented; 1017 MatScalar *ap,value,*aa=a->a,*bap; 1018 1019 PetscFunctionBegin; 1020 for (k=0; k<m; k++) { /* loop over added rows */ 1021 row = im[k]; brow = row/bs; 1022 if (row < 0) continue; 1023 #if defined(PETSC_USE_BOPT_g) 1024 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1025 #endif 1026 rp = aj + ai[brow]; 1027 ap = aa + bs2*ai[brow]; 1028 rmax = imax[brow]; 1029 nrow = ailen[brow]; 1030 low = 0; 1031 for (l=0; l<n; l++) { /* loop over added columns */ 1032 if (in[l] < 0) continue; 1033 #if defined(PETSC_USE_BOPT_g) 1034 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1035 #endif 1036 col = in[l]; bcol = col/bs; 1037 ridx = row % bs; cidx = col % bs; 1038 if (roworiented) { 1039 value = v[l + k*n]; 1040 } else { 1041 value = v[k + l*m]; 1042 } 1043 if (!sorted) low = 0; high = nrow; 1044 while (high-low > 7) { 1045 t = (low+high)/2; 1046 if (rp[t] > bcol) high = t; 1047 else low = t; 1048 } 1049 for (i=low; i<high; i++) { 1050 if (rp[i] > bcol) break; 1051 if (rp[i] == bcol) { 1052 bap = ap + bs2*i + bs*cidx + ridx; 1053 if (is == ADD_VALUES) *bap += value; 1054 else *bap = value; 1055 goto noinsert1; 1056 } 1057 } 1058 if (nonew == 1) goto noinsert1; 1059 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1060 if (nrow >= rmax) { 1061 /* there is no extra room in row, therefore enlarge */ 1062 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1063 MatScalar *new_a; 1064 1065 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1066 1067 /* Malloc new storage space */ 1068 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1069 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1070 new_j = (int*)(new_a + bs2*new_nz); 1071 new_i = new_j + new_nz; 1072 1073 /* copy over old data into new slots */ 1074 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1075 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1076 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1077 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1078 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1079 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1080 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1081 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1082 /* free up old matrix storage */ 1083 ierr = PetscFree(a->a);CHKERRQ(ierr); 1084 if (!a->singlemalloc) { 1085 ierr = PetscFree(a->i);CHKERRQ(ierr); 1086 ierr = PetscFree(a->j);CHKERRQ(ierr); 1087 } 1088 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1089 a->singlemalloc = PETSC_TRUE; 1090 1091 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1092 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1093 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1094 a->maxnz += bs2*CHUNKSIZE; 1095 a->reallocs++; 1096 a->nz++; 1097 } 1098 N = nrow++ - 1; 1099 /* shift up all the later entries in this row */ 1100 for (ii=N; ii>=i; ii--) { 1101 rp[ii+1] = rp[ii]; 1102 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1103 } 1104 if (N>=i) { 1105 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1106 } 1107 rp[i] = bcol; 1108 ap[bs2*i + bs*cidx + ridx] = value; 1109 noinsert1:; 1110 low = i; 1111 } 1112 ailen[brow] = nrow; 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1120 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1121 { 1122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1123 Mat outA; 1124 int ierr; 1125 PetscTruth row_identity,col_identity; 1126 1127 PetscFunctionBegin; 1128 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1129 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1130 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1131 if (!row_identity || !col_identity) { 1132 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1133 } 1134 1135 outA = inA; 1136 inA->factor = FACTOR_LU; 1137 1138 if (!a->diag) { 1139 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1140 } 1141 /* 1142 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1143 for ILU(0) factorization with natural ordering 1144 */ 1145 if (a->bs < 8) { 1146 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1147 } else { 1148 a->row = row; 1149 a->col = col; 1150 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1151 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1152 1153 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1154 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1155 PetscLogObjectParent(inA,a->icol); 1156 1157 if (!a->solve_work) { 1158 ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr); 1159 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar)); 1160 } 1161 } 1162 1163 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1164 1165 PetscFunctionReturn(0); 1166 } 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1169 int MatPrintHelp_SeqBAIJ(Mat A) 1170 { 1171 static PetscTruth called = PETSC_FALSE; 1172 MPI_Comm comm = A->comm; 1173 int ierr; 1174 1175 PetscFunctionBegin; 1176 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1177 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1178 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 EXTERN_C_BEGIN 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1185 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1186 { 1187 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1188 int i,nz,n; 1189 1190 PetscFunctionBegin; 1191 nz = baij->maxnz; 1192 n = mat->n; 1193 for (i=0; i<nz; i++) { 1194 baij->j[i] = indices[i]; 1195 } 1196 baij->nz = nz; 1197 for (i=0; i<n; i++) { 1198 baij->ilen[i] = baij->imax[i]; 1199 } 1200 1201 PetscFunctionReturn(0); 1202 } 1203 EXTERN_C_END 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1207 /*@ 1208 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1209 in the matrix. 1210 1211 Input Parameters: 1212 + mat - the SeqBAIJ matrix 1213 - indices - the column indices 1214 1215 Level: advanced 1216 1217 Notes: 1218 This can be called if you have precomputed the nonzero structure of the 1219 matrix and want to provide it to the matrix object to improve the performance 1220 of the MatSetValues() operation. 1221 1222 You MUST have set the correct numbers of nonzeros per row in the call to 1223 MatCreateSeqBAIJ(). 1224 1225 MUST be called before any calls to MatSetValues(); 1226 1227 @*/ 1228 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1229 { 1230 int ierr,(*f)(Mat,int *); 1231 1232 PetscFunctionBegin; 1233 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1234 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1235 if (f) { 1236 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1237 } else { 1238 SETERRQ(1,"Wrong type of matrix to set column indices"); 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1245 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1246 { 1247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1248 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1249 PetscReal atmp; 1250 Scalar *x,zero = 0.0; 1251 MatScalar *aa; 1252 int ncols,brow,krow,kcol; 1253 1254 PetscFunctionBegin; 1255 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1256 bs = a->bs; 1257 aa = a->a; 1258 ai = a->i; 1259 aj = a->j; 1260 mbs = a->mbs; 1261 1262 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1263 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1264 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1265 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1266 for (i=0; i<mbs; i++) { 1267 ncols = ai[1] - ai[0]; ai++; 1268 brow = bs*i; 1269 for (j=0; j<ncols; j++){ 1270 /* bcol = bs*(*aj); */ 1271 for (kcol=0; kcol<bs; kcol++){ 1272 for (krow=0; krow<bs; krow++){ 1273 atmp = PetscAbsScalar(*aa); aa++; 1274 row = brow + krow; /* row index */ 1275 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1276 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1277 } 1278 } 1279 aj++; 1280 } 1281 } 1282 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1288 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1289 { 1290 int ierr; 1291 1292 PetscFunctionBegin; 1293 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1299 int MatGetArray_SeqBAIJ(Mat A,Scalar **array) 1300 { 1301 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1302 PetscFunctionBegin; 1303 *array = a->a; 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1309 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array) 1310 { 1311 PetscFunctionBegin; 1312 PetscFunctionReturn(0); 1313 } 1314 1315 /* -------------------------------------------------------------------*/ 1316 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1317 MatGetRow_SeqBAIJ, 1318 MatRestoreRow_SeqBAIJ, 1319 MatMult_SeqBAIJ_N, 1320 MatMultAdd_SeqBAIJ_N, 1321 MatMultTranspose_SeqBAIJ, 1322 MatMultTransposeAdd_SeqBAIJ, 1323 MatSolve_SeqBAIJ_N, 1324 0, 1325 0, 1326 0, 1327 MatLUFactor_SeqBAIJ, 1328 0, 1329 0, 1330 MatTranspose_SeqBAIJ, 1331 MatGetInfo_SeqBAIJ, 1332 MatEqual_SeqBAIJ, 1333 MatGetDiagonal_SeqBAIJ, 1334 MatDiagonalScale_SeqBAIJ, 1335 MatNorm_SeqBAIJ, 1336 0, 1337 MatAssemblyEnd_SeqBAIJ, 1338 0, 1339 MatSetOption_SeqBAIJ, 1340 MatZeroEntries_SeqBAIJ, 1341 MatZeroRows_SeqBAIJ, 1342 MatLUFactorSymbolic_SeqBAIJ, 1343 MatLUFactorNumeric_SeqBAIJ_N, 1344 0, 1345 0, 1346 MatSetUpPreallocation_SeqBAIJ, 1347 0, 1348 MatGetOwnershipRange_SeqBAIJ, 1349 MatILUFactorSymbolic_SeqBAIJ, 1350 0, 1351 MatGetArray_SeqBAIJ, 1352 MatRestoreArray_SeqBAIJ, 1353 MatDuplicate_SeqBAIJ, 1354 0, 1355 0, 1356 MatILUFactor_SeqBAIJ, 1357 0, 1358 0, 1359 MatGetSubMatrices_SeqBAIJ, 1360 MatIncreaseOverlap_SeqBAIJ, 1361 MatGetValues_SeqBAIJ, 1362 0, 1363 MatPrintHelp_SeqBAIJ, 1364 MatScale_SeqBAIJ, 1365 0, 1366 0, 1367 0, 1368 MatGetBlockSize_SeqBAIJ, 1369 MatGetRowIJ_SeqBAIJ, 1370 MatRestoreRowIJ_SeqBAIJ, 1371 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 MatSetValuesBlocked_SeqBAIJ, 1378 MatGetSubMatrix_SeqBAIJ, 1379 MatDestroy_SeqBAIJ, 1380 MatView_SeqBAIJ, 1381 MatGetMaps_Petsc, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 0, 1388 MatGetRowMax_SeqBAIJ, 1389 MatConvert_Basic}; 1390 1391 EXTERN_C_BEGIN 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1394 int MatStoreValues_SeqBAIJ(Mat mat) 1395 { 1396 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1397 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1398 int ierr; 1399 1400 PetscFunctionBegin; 1401 if (aij->nonew != 1) { 1402 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1403 } 1404 1405 /* allocate space for values if not already there */ 1406 if (!aij->saved_values) { 1407 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr); 1408 } 1409 1410 /* copy values over */ 1411 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 EXTERN_C_END 1415 1416 EXTERN_C_BEGIN 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1419 int MatRetrieveValues_SeqBAIJ(Mat mat) 1420 { 1421 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1422 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1423 1424 PetscFunctionBegin; 1425 if (aij->nonew != 1) { 1426 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1427 } 1428 if (!aij->saved_values) { 1429 SETERRQ(1,"Must call MatStoreValues(A);first"); 1430 } 1431 1432 /* copy values over */ 1433 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 EXTERN_C_END 1437 1438 EXTERN_C_BEGIN 1439 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1440 EXTERN_C_END 1441 1442 EXTERN_C_BEGIN 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatCreate_SeqBAIJ" 1445 int MatCreate_SeqBAIJ(Mat B) 1446 { 1447 int ierr,size; 1448 Mat_SeqBAIJ *b; 1449 1450 PetscFunctionBegin; 1451 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1452 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1453 1454 B->m = B->M = PetscMax(B->m,B->M); 1455 B->n = B->N = PetscMax(B->n,B->N); 1456 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1457 B->data = (void*)b; 1458 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1459 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1460 B->factor = 0; 1461 B->lupivotthreshold = 1.0; 1462 B->mapping = 0; 1463 b->row = 0; 1464 b->col = 0; 1465 b->icol = 0; 1466 b->reallocs = 0; 1467 b->saved_values = 0; 1468 #if defined(PETSC_USE_MAT_SINGLE) 1469 b->setvalueslen = 0; 1470 b->setvaluescopy = PETSC_NULL; 1471 #endif 1472 b->single_precision_solves = PETSC_FALSE; 1473 1474 ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1475 ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1476 1477 b->sorted = PETSC_FALSE; 1478 b->roworiented = PETSC_TRUE; 1479 b->nonew = 0; 1480 b->diag = 0; 1481 b->solve_work = 0; 1482 b->mult_work = 0; 1483 b->spptr = 0; 1484 B->info.nz_unneeded = (PetscReal)b->maxnz; 1485 b->keepzeroedrows = PETSC_FALSE; 1486 1487 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1488 "MatStoreValues_SeqBAIJ", 1489 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1490 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1491 "MatRetrieveValues_SeqBAIJ", 1492 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1494 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1495 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1497 "MatConvert_SeqBAIJ_SeqAIJ", 1498 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 EXTERN_C_END 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1505 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1506 { 1507 Mat C; 1508 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1509 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1510 1511 PetscFunctionBegin; 1512 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1513 1514 *B = 0; 1515 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1516 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1517 c = (Mat_SeqBAIJ*)C->data; 1518 1519 c->bs = a->bs; 1520 c->bs2 = a->bs2; 1521 c->mbs = a->mbs; 1522 c->nbs = a->nbs; 1523 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1524 1525 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1526 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1527 for (i=0; i<mbs; i++) { 1528 c->imax[i] = a->imax[i]; 1529 c->ilen[i] = a->ilen[i]; 1530 } 1531 1532 /* allocate the matrix space */ 1533 c->singlemalloc = PETSC_TRUE; 1534 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1535 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1536 c->j = (int*)(c->a + nz*bs2); 1537 c->i = c->j + nz; 1538 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1539 if (mbs > 0) { 1540 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1541 if (cpvalues == MAT_COPY_VALUES) { 1542 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1543 } else { 1544 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1545 } 1546 } 1547 1548 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1549 c->sorted = a->sorted; 1550 c->roworiented = a->roworiented; 1551 c->nonew = a->nonew; 1552 1553 if (a->diag) { 1554 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1555 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1556 for (i=0; i<mbs; i++) { 1557 c->diag[i] = a->diag[i]; 1558 } 1559 } else c->diag = 0; 1560 c->nz = a->nz; 1561 c->maxnz = a->maxnz; 1562 c->solve_work = 0; 1563 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1564 c->mult_work = 0; 1565 C->preallocated = PETSC_TRUE; 1566 C->assembled = PETSC_TRUE; 1567 *B = C; 1568 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 EXTERN_C_BEGIN 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatLoad_SeqBAIJ" 1575 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1576 { 1577 Mat_SeqBAIJ *a; 1578 Mat B; 1579 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1580 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1581 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1582 int *masked,nmask,tmp,bs2,ishift; 1583 Scalar *aa; 1584 MPI_Comm comm = ((PetscObject)viewer)->comm; 1585 1586 PetscFunctionBegin; 1587 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1588 bs2 = bs*bs; 1589 1590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1591 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1592 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1593 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1594 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1595 M = header[1]; N = header[2]; nz = header[3]; 1596 1597 if (header[3] < 0) { 1598 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1599 } 1600 1601 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1602 1603 /* 1604 This code adds extra rows to make sure the number of rows is 1605 divisible by the blocksize 1606 */ 1607 mbs = M/bs; 1608 extra_rows = bs - M + bs*(mbs); 1609 if (extra_rows == bs) extra_rows = 0; 1610 else mbs++; 1611 if (extra_rows) { 1612 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1613 } 1614 1615 /* read in row lengths */ 1616 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1617 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1618 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1619 1620 /* read in column indices */ 1621 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1622 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1623 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1624 1625 /* loop over row lengths determining block row lengths */ 1626 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1627 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1628 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1629 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1630 masked = mask + mbs; 1631 rowcount = 0; nzcount = 0; 1632 for (i=0; i<mbs; i++) { 1633 nmask = 0; 1634 for (j=0; j<bs; j++) { 1635 kmax = rowlengths[rowcount]; 1636 for (k=0; k<kmax; k++) { 1637 tmp = jj[nzcount++]/bs; 1638 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1639 } 1640 rowcount++; 1641 } 1642 browlengths[i] += nmask; 1643 /* zero out the mask elements we set */ 1644 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1645 } 1646 1647 /* create our matrix */ 1648 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1649 B = *A; 1650 a = (Mat_SeqBAIJ*)B->data; 1651 1652 /* set matrix "i" values */ 1653 a->i[0] = 0; 1654 for (i=1; i<= mbs; i++) { 1655 a->i[i] = a->i[i-1] + browlengths[i-1]; 1656 a->ilen[i-1] = browlengths[i-1]; 1657 } 1658 a->nz = 0; 1659 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1660 1661 /* read in nonzero values */ 1662 ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr); 1663 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1664 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1665 1666 /* set "a" and "j" values into matrix */ 1667 nzcount = 0; jcount = 0; 1668 for (i=0; i<mbs; i++) { 1669 nzcountb = nzcount; 1670 nmask = 0; 1671 for (j=0; j<bs; j++) { 1672 kmax = rowlengths[i*bs+j]; 1673 for (k=0; k<kmax; k++) { 1674 tmp = jj[nzcount++]/bs; 1675 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1676 } 1677 } 1678 /* sort the masked values */ 1679 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1680 1681 /* set "j" values into matrix */ 1682 maskcount = 1; 1683 for (j=0; j<nmask; j++) { 1684 a->j[jcount++] = masked[j]; 1685 mask[masked[j]] = maskcount++; 1686 } 1687 /* set "a" values into matrix */ 1688 ishift = bs2*a->i[i]; 1689 for (j=0; j<bs; j++) { 1690 kmax = rowlengths[i*bs+j]; 1691 for (k=0; k<kmax; k++) { 1692 tmp = jj[nzcountb]/bs ; 1693 block = mask[tmp] - 1; 1694 point = jj[nzcountb] - bs*tmp; 1695 idx = ishift + bs2*block + j + bs*point; 1696 a->a[idx] = (MatScalar)aa[nzcountb++]; 1697 } 1698 } 1699 /* zero out the mask elements we set */ 1700 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1701 } 1702 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1703 1704 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1705 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1706 ierr = PetscFree(aa);CHKERRQ(ierr); 1707 ierr = PetscFree(jj);CHKERRQ(ierr); 1708 ierr = PetscFree(mask);CHKERRQ(ierr); 1709 1710 B->assembled = PETSC_TRUE; 1711 1712 ierr = MatView_Private(B);CHKERRQ(ierr); 1713 PetscFunctionReturn(0); 1714 } 1715 EXTERN_C_END 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "MatCreateSeqBAIJ" 1719 /*@C 1720 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1721 compressed row) format. For good matrix assembly performance the 1722 user should preallocate the matrix storage by setting the parameter nz 1723 (or the array nnz). By setting these parameters accurately, performance 1724 during matrix assembly can be increased by more than a factor of 50. 1725 1726 Collective on MPI_Comm 1727 1728 Input Parameters: 1729 + comm - MPI communicator, set to PETSC_COMM_SELF 1730 . bs - size of block 1731 . m - number of rows 1732 . n - number of columns 1733 . nz - number of nonzero blocks per block row (same for all rows) 1734 - nnz - array containing the number of nonzero blocks in the various block rows 1735 (possibly different for each block row) or PETSC_NULL 1736 1737 Output Parameter: 1738 . A - the matrix 1739 1740 Options Database Keys: 1741 . -mat_no_unroll - uses code that does not unroll the loops in the 1742 block calculations (much slower) 1743 . -mat_block_size - size of the blocks to use 1744 1745 Level: intermediate 1746 1747 Notes: 1748 A nonzero block is any block that as 1 or more nonzeros in it 1749 1750 The block AIJ format is fully compatible with standard Fortran 77 1751 storage. That is, the stored row and column indices can begin at 1752 either one (as in Fortran) or zero. See the users' manual for details. 1753 1754 Specify the preallocated storage with either nz or nnz (not both). 1755 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1756 allocation. For additional details, see the users manual chapter on 1757 matrices. 1758 1759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1760 @*/ 1761 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1762 { 1763 int ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1767 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1768 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1774 /*@C 1775 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1776 per row in the matrix. For good matrix assembly performance the 1777 user should preallocate the matrix storage by setting the parameter nz 1778 (or the array nnz). By setting these parameters accurately, performance 1779 during matrix assembly can be increased by more than a factor of 50. 1780 1781 Collective on MPI_Comm 1782 1783 Input Parameters: 1784 + A - the matrix 1785 . bs - size of block 1786 . nz - number of block nonzeros per block row (same for all rows) 1787 - nnz - array containing the number of block nonzeros in the various block rows 1788 (possibly different for each block row) or PETSC_NULL 1789 1790 Options Database Keys: 1791 . -mat_no_unroll - uses code that does not unroll the loops in the 1792 block calculations (much slower) 1793 . -mat_block_size - size of the blocks to use 1794 1795 Level: intermediate 1796 1797 Notes: 1798 The block AIJ format is fully compatible with standard Fortran 77 1799 storage. That is, the stored row and column indices can begin at 1800 either one (as in Fortran) or zero. See the users' manual for details. 1801 1802 Specify the preallocated storage with either nz or nnz (not both). 1803 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1804 allocation. For additional details, see the users manual chapter on 1805 matrices. 1806 1807 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1808 @*/ 1809 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1810 { 1811 Mat_SeqBAIJ *b; 1812 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1813 PetscTruth flg; 1814 1815 PetscFunctionBegin; 1816 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1817 if (!flg) PetscFunctionReturn(0); 1818 1819 B->preallocated = PETSC_TRUE; 1820 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1821 if (nnz && newbs != bs) { 1822 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1823 } 1824 bs = newbs; 1825 1826 mbs = B->m/bs; 1827 nbs = B->n/bs; 1828 bs2 = bs*bs; 1829 1830 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1831 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1832 } 1833 1834 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1835 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1836 if (nnz) { 1837 for (i=0; i<mbs; i++) { 1838 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1839 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1840 } 1841 } 1842 1843 b = (Mat_SeqBAIJ*)B->data; 1844 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1845 B->ops->solve = MatSolve_SeqBAIJ_Update; 1846 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1847 if (!flg) { 1848 switch (bs) { 1849 case 1: 1850 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1851 B->ops->mult = MatMult_SeqBAIJ_1; 1852 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1853 break; 1854 case 2: 1855 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1856 B->ops->mult = MatMult_SeqBAIJ_2; 1857 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1858 break; 1859 case 3: 1860 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1861 B->ops->mult = MatMult_SeqBAIJ_3; 1862 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1863 break; 1864 case 4: 1865 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1866 B->ops->mult = MatMult_SeqBAIJ_4; 1867 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1868 break; 1869 case 5: 1870 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1871 B->ops->mult = MatMult_SeqBAIJ_5; 1872 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1873 break; 1874 case 6: 1875 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1876 B->ops->mult = MatMult_SeqBAIJ_6; 1877 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1878 break; 1879 case 7: 1880 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1881 B->ops->mult = MatMult_SeqBAIJ_7; 1882 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1883 break; 1884 default: 1885 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1886 B->ops->mult = MatMult_SeqBAIJ_N; 1887 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1888 break; 1889 } 1890 } 1891 b->bs = bs; 1892 b->mbs = mbs; 1893 b->nbs = nbs; 1894 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1895 if (!nnz) { 1896 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1897 else if (nz <= 0) nz = 1; 1898 for (i=0; i<mbs; i++) b->imax[i] = nz; 1899 nz = nz*mbs; 1900 } else { 1901 nz = 0; 1902 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1903 } 1904 1905 /* allocate the matrix space */ 1906 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1907 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1908 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1909 b->j = (int*)(b->a + nz*bs2); 1910 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1911 b->i = b->j + nz; 1912 b->singlemalloc = PETSC_TRUE; 1913 1914 b->i[0] = 0; 1915 for (i=1; i<mbs+1; i++) { 1916 b->i[i] = b->i[i-1] + b->imax[i-1]; 1917 } 1918 1919 /* b->ilen will count nonzeros in each block row so far. */ 1920 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1921 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1922 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1923 1924 b->bs = bs; 1925 b->bs2 = bs2; 1926 b->mbs = mbs; 1927 b->nz = 0; 1928 b->maxnz = nz*bs2; 1929 B->info.nz_unneeded = (PetscReal)b->maxnz; 1930 PetscFunctionReturn(0); 1931 } 1932 1933