Python Integration | DataMystic Developer
Python Integration with TextPipe COM
Automate TextPipe data transformations from Python using the pywin32 library. These examples demonstrate COM object instantiation, filter execution, output verification with hash comparison, batch processing, and a context manager pattern for clean resource management.
- TextPipe Pro edition installed and registered
- Python 3.8+ with
pywin32installed:pip install pywin32 - Windows operating system (COM automation is Windows-only)
Basic Usage
The simplest Python COM automation: create the TextPipe COM object, load a filter, process a file, and release resources.
"""Basic TextPipe COM automation using pywin32."""
import win32com.client
import pythoncom
import sys
def main():
# Initialize COM library for this thread
pythoncom.CoInitialize()
tp = None
try:
# Create TextPipe COM object
tp = win32com.client.Dispatch("TextPipe.Application")
# Configure for unattended operation
tp.Silent = True
tp.Visible = False
tp.OverwriteOutput = True
# Load a pre-configured filter
if not tp.LoadFilter(r"C:\Filters\cleanup_csv.fll"):
print(f"Error loading filter: {tp.GetLastError()}")
sys.exit(1)
# Add input file and set output
tp.AddInputFile(r"C:\Data\raw_export.csv")
tp.OutputFile = r"C:\Data\cleaned_export.csv"
# Execute the transformation
files_processed = tp.Go()
print(f"Successfully processed {files_processed} file(s).")
except Exception as e:
print(f"TextPipe automation failed: {e}")
sys.exit(1)
finally:
# Always release COM resources
if tp:
tp.Quit()
del tp
pythoncom.CoUninitialize()
if __name__ == "__main__":
main()
Output Verification with Hash Comparison
Verify transformation output by checking file existence, size, and computing SHA-256 hashes for before/after comparison. Useful for validating that a transformation produced expected results or detecting when input data has not changed.
"""TextPipe COM automation with output verification and hash comparison."""
import win32com.client
import pythoncom
import hashlib
import os
import sys
from pathlib import Path
def compute_file_hash(filepath: str, algorithm: str = "sha256") -> str:
"""Compute the hash of a file for verification."""
h = hashlib.new(algorithm)
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()
def verify_output(input_file: str, output_file: str) -> dict:
"""Verify transformation output meets quality checks."""
results = {
"exists": False,
"non_empty": False,
"size_ratio_ok": True,
"input_hash": None,
"output_hash": None,
"files_differ": True,
}
# Check output file exists
if not os.path.exists(output_file):
return results
results["exists"] = True
# Check output is not empty
output_size = os.path.getsize(output_file)
if output_size == 0:
return results
results["non_empty"] = True
# Check size ratio (output should be at least 10% of input)
input_size = os.path.getsize(input_file)
if input_size > 0:
ratio = output_size / input_size
results["size_ratio_ok"] = ratio >= 0.1
# Hash comparison to detect identical files
results["input_hash"] = compute_file_hash(input_file)
results["output_hash"] = compute_file_hash(output_file)
results["files_differ"] = results["input_hash"] != results["output_hash"]
return results
def transform_and_verify(filter_path: str, input_file: str, output_file: str):
"""Run a TextPipe transformation and verify the output."""
pythoncom.CoInitialize()
tp = None
try:
tp = win32com.client.Dispatch("TextPipe.Application")
tp.Silent = True
tp.Visible = False
tp.OverwriteOutput = True
# Load filter
if not tp.LoadFilter(filter_path):
raise RuntimeError(f"Filter load failed: {tp.GetLastError()}")
# Configure and execute
tp.AddInputFile(input_file)
tp.OutputFile = output_file
files_processed = tp.Go()
if files_processed == 0:
raise RuntimeError(f"No files processed: {tp.GetLastError()}")
# Verify output
verification = verify_output(input_file, output_file)
if not verification["exists"]:
raise RuntimeError("Output file was not created")
if not verification["non_empty"]:
raise RuntimeError("Output file is empty")
if not verification["size_ratio_ok"]:
print("WARNING: Output is less than 10% of input size")
print(f"Transformation verified successfully:")
print(f" Input hash: {verification['input_hash'][:16]}...")
print(f" Output hash: {verification['output_hash'][:16]}...")
print(f" Files differ: {verification['files_differ']}")
return verification
finally:
if tp:
tp.Quit()
del tp
pythoncom.CoUninitialize()
if __name__ == "__main__":
result = transform_and_verify(
filter_path=r"C:\Filters\ebcdic_to_ascii.fll",
input_file=r"C:\Data\mainframe_extract.dat",
output_file=r"C:\Data\converted_output.csv",
)
Batch Processing
Process multiple folders or file lists in a single COM session. Reusing one COM instance across many transformations is more efficient than creating a new instance per file.
"""Batch processing multiple folders with TextPipe COM."""
import win32com.client
import pythoncom
import os
from datetime import datetime
def batch_process(filter_path: str, folders: list, file_mask: str = "*.csv"):
"""Process multiple folders using a single TextPipe COM session."""
pythoncom.CoInitialize()
tp = None
results = []
try:
tp = win32com.client.Dispatch("TextPipe.Application")
tp.Silent = True
tp.OverwriteOutput = True
# Set up logging
log_name = f"textpipe_batch_{datetime.now():%Y%m%d_%H%M%S}.log"
tp.LogFile = os.path.join(r"C:\Logs", log_name)
# Load filter once for all folders
if not tp.LoadFilter(filter_path):
raise RuntimeError(f"Filter load failed: {tp.GetLastError()}")
for folder in folders:
# Clear previous inputs
tp.ClearInputFiles()
# Add all matching files from this folder
tp.AddInputFolder(folder, file_mask, True)
# Create output subfolder
output_dir = os.path.join(folder, "Processed")
os.makedirs(output_dir, exist_ok=True)
tp.OutputFolder = output_dir
# Execute transformation
count = tp.Go()
results.append({
"folder": folder,
"files_processed": count,
"output_dir": output_dir,
})
print(f" {folder}: {count} files processed")
total = sum(r["files_processed"] for r in results)
print(f"\nBatch complete. Total: {total} files across {len(folders)} folders.")
return results
finally:
if tp:
tp.Quit()
del tp
pythoncom.CoUninitialize()
if __name__ == "__main__":
folders_to_process = [
r"C:\Data\Sales",
r"C:\Data\Marketing",
r"C:\Data\Finance",
]
batch_process(
filter_path=r"C:\Filters\data_standardization.fll",
folders=folders_to_process,
file_mask="*.csv",
)
Context Manager Pattern
Wrap the TextPipe COM object in a Python context manager for automatic cleanup. This ensures COM resources are always released, even when exceptions occur.
"""TextPipe COM context manager for clean resource management."""
import win32com.client
import pythoncom
from contextlib import contextmanager
@contextmanager
def textpipe_session(log_file: str = None):
"""Context manager for TextPipe COM automation sessions.
Usage:
with textpipe_session() as tp:
tp.LoadFilter(r"C:\\Filters\\my_filter.fll")
tp.AddInputFile(r"C:\\Data\\input.csv")
tp.OutputFile = r"C:\\Data\\output.csv"
tp.Go()
"""
pythoncom.CoInitialize()
tp = None
try:
tp = win32com.client.Dispatch("TextPipe.Application")
tp.Silent = True
tp.Visible = False
tp.OverwriteOutput = True
if log_file:
tp.LogFile = log_file
yield tp
finally:
if tp:
tp.Quit()
del tp
pythoncom.CoUninitialize()
# --- Usage Examples ---
def example_single_file():
"""Transform a single file using the context manager."""
with textpipe_session() as tp:
tp.LoadFilter(r"C:\Filters\normalize_dates.fll")
tp.AddInputFile(r"C:\Data\report.csv")
tp.OutputFile = r"C:\Data\report_normalized.csv"
count = tp.Go()
print(f"Processed {count} file(s)")
def example_multi_step_pipeline():
"""Run a multi-step transformation pipeline."""
steps = [
(r"C:\Filters\step1_clean.fll", r"C:\Data\raw.csv", r"C:\Temp\step1.csv"),
(r"C:\Filters\step2_transform.fll", r"C:\Temp\step1.csv", r"C:\Temp\step2.csv"),
(r"C:\Filters\step3_validate.fll", r"C:\Temp\step2.csv", r"C:\Data\final.csv"),
]
with textpipe_session(log_file=r"C:\Logs\pipeline.log") as tp:
for i, (filter_path, input_file, output_file) in enumerate(steps, 1):
tp.ClearInputFiles()
tp.LoadFilter(filter_path)
tp.AddInputFile(input_file)
tp.OutputFile = output_file
count = tp.Go()
print(f"Step {i}: {count} file(s) processed -> {output_file}")
if __name__ == "__main__":
example_single_file()
example_multi_step_pipeline()
Complete Production Example
A production-ready script combining all patterns: argument parsing, logging, batch processing, verification, and proper error reporting.
"""
Production TextPipe automation script.
Usage: python textpipe_transform.py --filter path.fll --input dir --output dir
"""
import argparse
import logging
import os
import sys
from datetime import datetime
from pathlib import Path
import win32com.client
import pythoncom
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(f"textpipe_{datetime.now():%Y%m%d}.log"),
logging.StreamHandler(),
],
)
logger = logging.getLogger(__name__)
class TextPipeError(Exception):
"""Custom exception for TextPipe automation errors."""
pass
class TextPipeAutomation:
"""Production TextPipe COM automation wrapper."""
def __init__(self):
self._tp = None
pythoncom.CoInitialize()
def __enter__(self):
try:
self._tp = win32com.client.Dispatch("TextPipe.Application")
except Exception as e:
raise TextPipeError(
"Cannot create TextPipe COM object. "
"Ensure TextPipe Pro is installed and registered."
) from e
self._tp.Silent = True
self._tp.Visible = False
self._tp.OverwriteOutput = True
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if self._tp:
self._tp.Quit()
del self._tp
pythoncom.CoUninitialize()
return False
def transform(self, filter_path: str, input_path: str, output_path: str) -> int:
"""Execute a transformation and return files processed count."""
if not os.path.exists(filter_path):
raise TextPipeError(f"Filter file not found: {filter_path}")
self._tp.ClearInputFiles()
if not self._tp.LoadFilter(filter_path):
raise TextPipeError(f"Load filter failed: {self._tp.GetLastError()}")
if os.path.isdir(input_path):
self._tp.AddInputFolder(input_path, "*.*", True)
os.makedirs(output_path, exist_ok=True)
self._tp.OutputFolder = output_path
else:
if not os.path.exists(input_path):
raise TextPipeError(f"Input file not found: {input_path}")
self._tp.AddInputFile(input_path)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
self._tp.OutputFile = output_path
count = self._tp.Go()
if count == 0:
error = self._tp.GetLastError()
if error:
raise TextPipeError(f"Transform failed: {error}")
logger.warning("No files were processed (input may be empty)")
return count
def main():
parser = argparse.ArgumentParser(description="TextPipe batch transformation")
parser.add_argument("--filter", required=True, help="Path to .fll filter file")
parser.add_argument("--input", required=True, help="Input file or directory")
parser.add_argument("--output", required=True, help="Output file or directory")
args = parser.parse_args()
logger.info(f"Starting transformation: {args.filter}")
logger.info(f"Input: {args.input}")
logger.info(f"Output: {args.output}")
try:
with TextPipeAutomation() as automation:
count = automation.transform(args.filter, args.input, args.output)
logger.info(f"Complete: {count} file(s) processed successfully")
except TextPipeError as e:
logger.error(f"TextPipe error: {e}")
sys.exit(1)
except Exception as e:
logger.error(f"Unexpected error: {e}")
sys.exit(2)
if __name__ == "__main__":
main()
Next Steps
- TextPipe COM API Reference — Full method and property documentation
- PowerShell Integration — PowerShell COM automation examples
- CI/CD Integration — Pipeline configuration for automated transforms
- Batch Processing Examples — Command-line batch automation