Skip to contents
library(twbparser)

twb_path <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")

if (nzchar(twb_path) && file.exists(twb_path)) {
  parser <- TwbParser$new(twb_path)
} else {
  cat("> Demo .twb not found in installed package. Skipping executable example.\n")
  cat("  To enable examples, add `inst/extdata/test_for_wenjie.twb` to the package.\n")
}
#> TWB loaded: test_for_wenjie.twb
#> TWB parsed and ready

Introduction

The twbparser package allows you to parse Tableau .twb and .twbx workbook files and extract rich metadata such as datasources, relationships, joins, fields, calculated fields, and TWBX assets. This vignette demonstrates common use cases.

Parsing a Tableau Workbook

parser$summary()
#> TWB PARSER SUMMARY
#> ---------------------
#> File: test_for_wenjie.twb
#> Datasources: 2
#> Parameters:  0
#> Relationships: 1
#> Calculated fields: 1
#> Raw fields: 55
#> Inferred joins: 0

Extracting Datasources and Parameters

datasources <- parser$get_datasources()
parameters <- parser$get_parameters()

print(head(datasources))
#> # A tibble: 2 × 10
#>   datasource     primary_table connection_id connection_caption connection_class
#>   <chr>          <chr>         <chr>         <chr>              <chr>           
#> 1 Municipal_Bou… [Municipal_B… ogrdirect.07… Municipal_Boundar… ogrdirect       
#> 2 Sheet1         [Sheet1$]     excel-direct… test_county        excel-direct    
#> # ℹ 5 more variables: connection_target <chr>, datasource_name <chr>,
#> #   field_count <int>, connection_type <chr>, location <chr>
print(head(parameters))
#> # A tibble: 0 × 5
#> # ℹ 5 variables: datasource_name <chr>, primary_table <chr>, field_count <int>,
#> #   connection_type <chr>, location <chr>

Working with Relationships and Joins

relations <- parser$get_relations()
joins <- parser$get_joins()
relationships <- parser$get_relationships()
inferred_relationships <- parser$get_inferred_relationships()

cat("Legacy relations:\n")
#> Legacy relations:
print(head(relations))
#> # A tibble: 4 × 6
#>   name                                   table connection type  join  custom_sql
#>   <chr>                                  <chr> <chr>      <chr> <chr> <chr>     
#> 1 Sheet1                                 [She… excel-dir… table NA    ""        
#> 2 Municipal_Boundaries_of_NJ             [Mun… ogrdirect… table NA    ""        
#> 3 Sheet1_F3CB2A87000C42DCA10AF147C27ADE… [Ext… NA         table NA    ""        
#> 4 Municipal_Boundaries_of_NJ_2A19790ECA… [Ext… NA         table NA    ""

cat("Join clauses:\n")
#> Join clauses:
print(head(joins))
#> # A tibble: 0 × 6
#> # ℹ 6 variables: join_type <chr>, left_table <chr>, left_field <chr>,
#> #   operator <chr>, right_table <chr>, right_field <chr>

cat("Modern relationships:\n")
#> Modern relationships:
print(head(relationships))
#> # A tibble: 1 × 8
#>   relationship_type left_table right_table       left_field operator right_field
#>   <chr>             <chr>      <chr>             <chr>      <chr>    <chr>      
#> 1 Relationship      Sheet1     Municipal_Bounda… County     =        COUNTY     
#> # ℹ 2 more variables: left_is_calc <lgl>, right_is_calc <lgl>

cat("Inferred relationships:\n")
#> Inferred relationships:
print(head(inferred_relationships))
#> # A tibble: 0 × 5
#> # ℹ 5 variables: left_table <chr>, left_field <chr>, right_table <chr>,
#> #   right_field <chr>, reason <chr>

Accessing Fields and Calculated Fields

fields <- parser$get_fields()
calculated_fields <- parser$get_calculated_fields(pretty = TRUE)

cat("Sample raw fields:\n")
#> Sample raw fields:
print(head(fields))
#> # A tibble: 6 × 10
#>   datasource        name  caption datatype role  semantic_role table table_clean
#>   <chr>             <chr> <chr>   <chr>    <chr> <chr>         <chr> <lgl>      
#> 1 federated.0grgao… OBJE… NA      integer  NA    NA            NA    NA         
#> 2 federated.0grgao… MUN   NA      string   NA    NA            NA    NA         
#> 3 federated.0grgao… COUN… NA      string   NA    NA            NA    NA         
#> 4 federated.0grgao… MUN_… NA      string   NA    NA            NA    NA         
#> 5 federated.0grgao… MUN_… NA      string   NA    NA            NA    NA         
#> 6 federated.0grgao… NAME  NA      string   NA    NA            NA    NA         
#> # ℹ 2 more variables: field_clean <chr>, is_parameter <lgl>

cat("Sample calculated fields:\n")
#> Sample calculated fields:
print(head(calculated_fields))
#> # A tibble: 1 × 9
#>   datasource        name  datatype role  is_table_calc calc_class formula_pretty
#>   <chr>             <chr> <chr>    <chr> <lgl>         <chr>      <chr>         
#> 1 federated.0grgao… no d… string   dime… FALSE         tableau    "if ISNULL([c…
#> # ℹ 2 more variables: tableau_internal_name <chr>, table_clean <chr>

Working with TWBX Files (if applicable)

cat("TWBX manifest contents:\n")
print(parser$get_twbx_manifest())

cat("Listing TWBX extract files:\n")
print(parser$get_twbx_extracts())

cat("Listing TWBX images:\n")
print(parser$get_twbx_images())

# Example: Extract all image files to temporary directory
# temp_images_dir <- tempdir()
# parser$extract_twbx_assets(types = "image", exdir = temp_images_dir)
# cat("Extracted TWBX images to:", temp_images_dir, "\n")

Validation of Relationships

validation <- parser$validate()
if (validation$ok) {
  cat("Relationships validated successfully.\n")
} else {
  cat("Validation issues found:\n")
  print(validation$issues)
}
#> Relationships validated successfully.

Summary

This vignette overviewed how to use the twbparser package for detailed inspection and extraction of Tableau workbook internals to assist in analysis, replication, or integration workflows.