Skip to main content

Python Integration with TextPipe COM

Automate TextPipe data transformations from Python using the pywin32 library. These examples demonstrate COM object instantiation, filter execution, output verification with hash comparison, batch processing, and a context manager pattern for clean resource management.

Basic Usage

The simplest Python COM automation: create the TextPipe COM object, load a filter, process a file, and release resources.

"""Basic TextPipe COM automation using pywin32."""
import win32com.client
import pythoncom
import sys

def main():
    # Initialize COM library for this thread
    pythoncom.CoInitialize()

    tp = None
    try:
        # Create TextPipe COM object
        tp = win32com.client.Dispatch("TextPipe.Application")

        # Configure for unattended operation
        tp.Silent = True
        tp.Visible = False
        tp.OverwriteOutput = True

        # Load a pre-configured filter
        if not tp.LoadFilter(r"C:\Filters\cleanup_csv.fll"):
            print(f"Error loading filter: {tp.GetLastError()}")
            sys.exit(1)

        # Add input file and set output
        tp.AddInputFile(r"C:\Data\raw_export.csv")
        tp.OutputFile = r"C:\Data\cleaned_export.csv"

        # Execute the transformation
        files_processed = tp.Go()
        print(f"Successfully processed {files_processed} file(s).")

    except Exception as e:
        print(f"TextPipe automation failed: {e}")
        sys.exit(1)

    finally:
        # Always release COM resources
        if tp:
            tp.Quit()
            del tp
        pythoncom.CoUninitialize()


if __name__ == "__main__":
    main()

Output Verification with Hash Comparison

Verify transformation output by checking file existence, size, and computing SHA-256 hashes for before/after comparison. Useful for validating that a transformation produced expected results or detecting when input data has not changed.

"""TextPipe COM automation with output verification and hash comparison."""
import win32com.client
import pythoncom
import hashlib
import os
import sys
from pathlib import Path


def compute_file_hash(filepath: str, algorithm: str = "sha256") -> str:
    """Compute the hash of a file for verification."""
    h = hashlib.new(algorithm)
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            h.update(chunk)
    return h.hexdigest()


def verify_output(input_file: str, output_file: str) -> dict:
    """Verify transformation output meets quality checks."""
    results = {
        "exists": False,
        "non_empty": False,
        "size_ratio_ok": True,
        "input_hash": None,
        "output_hash": None,
        "files_differ": True,
    }

    # Check output file exists
    if not os.path.exists(output_file):
        return results
    results["exists"] = True

    # Check output is not empty
    output_size = os.path.getsize(output_file)
    if output_size == 0:
        return results
    results["non_empty"] = True

    # Check size ratio (output should be at least 10% of input)
    input_size = os.path.getsize(input_file)
    if input_size > 0:
        ratio = output_size / input_size
        results["size_ratio_ok"] = ratio >= 0.1

    # Hash comparison to detect identical files
    results["input_hash"] = compute_file_hash(input_file)
    results["output_hash"] = compute_file_hash(output_file)
    results["files_differ"] = results["input_hash"] != results["output_hash"]

    return results


def transform_and_verify(filter_path: str, input_file: str, output_file: str):
    """Run a TextPipe transformation and verify the output."""
    pythoncom.CoInitialize()
    tp = None

    try:
        tp = win32com.client.Dispatch("TextPipe.Application")
        tp.Silent = True
        tp.Visible = False
        tp.OverwriteOutput = True

        # Load filter
        if not tp.LoadFilter(filter_path):
            raise RuntimeError(f"Filter load failed: {tp.GetLastError()}")

        # Configure and execute
        tp.AddInputFile(input_file)
        tp.OutputFile = output_file
        files_processed = tp.Go()

        if files_processed == 0:
            raise RuntimeError(f"No files processed: {tp.GetLastError()}")

        # Verify output
        verification = verify_output(input_file, output_file)

        if not verification["exists"]:
            raise RuntimeError("Output file was not created")
        if not verification["non_empty"]:
            raise RuntimeError("Output file is empty")
        if not verification["size_ratio_ok"]:
            print("WARNING: Output is less than 10% of input size")

        print(f"Transformation verified successfully:")
        print(f"  Input hash:  {verification['input_hash'][:16]}...")
        print(f"  Output hash: {verification['output_hash'][:16]}...")
        print(f"  Files differ: {verification['files_differ']}")

        return verification

    finally:
        if tp:
            tp.Quit()
            del tp
        pythoncom.CoUninitialize()


if __name__ == "__main__":
    result = transform_and_verify(
        filter_path=r"C:\Filters\ebcdic_to_ascii.fll",
        input_file=r"C:\Data\mainframe_extract.dat",
        output_file=r"C:\Data\converted_output.csv",
    )

Batch Processing

Process multiple folders or file lists in a single COM session. Reusing one COM instance across many transformations is more efficient than creating a new instance per file.

"""Batch processing multiple folders with TextPipe COM."""
import win32com.client
import pythoncom
import os
from datetime import datetime


def batch_process(filter_path: str, folders: list, file_mask: str = "*.csv"):
    """Process multiple folders using a single TextPipe COM session."""
    pythoncom.CoInitialize()
    tp = None
    results = []

    try:
        tp = win32com.client.Dispatch("TextPipe.Application")
        tp.Silent = True
        tp.OverwriteOutput = True

        # Set up logging
        log_name = f"textpipe_batch_{datetime.now():%Y%m%d_%H%M%S}.log"
        tp.LogFile = os.path.join(r"C:\Logs", log_name)

        # Load filter once for all folders
        if not tp.LoadFilter(filter_path):
            raise RuntimeError(f"Filter load failed: {tp.GetLastError()}")

        for folder in folders:
            # Clear previous inputs
            tp.ClearInputFiles()

            # Add all matching files from this folder
            tp.AddInputFolder(folder, file_mask, True)

            # Create output subfolder
            output_dir = os.path.join(folder, "Processed")
            os.makedirs(output_dir, exist_ok=True)
            tp.OutputFolder = output_dir

            # Execute transformation
            count = tp.Go()
            results.append({
                "folder": folder,
                "files_processed": count,
                "output_dir": output_dir,
            })

            print(f"  {folder}: {count} files processed")

        total = sum(r["files_processed"] for r in results)
        print(f"\nBatch complete. Total: {total} files across {len(folders)} folders.")
        return results

    finally:
        if tp:
            tp.Quit()
            del tp
        pythoncom.CoUninitialize()


if __name__ == "__main__":
    folders_to_process = [
        r"C:\Data\Sales",
        r"C:\Data\Marketing",
        r"C:\Data\Finance",
    ]

    batch_process(
        filter_path=r"C:\Filters\data_standardization.fll",
        folders=folders_to_process,
        file_mask="*.csv",
    )

Context Manager Pattern

Wrap the TextPipe COM object in a Python context manager for automatic cleanup. This ensures COM resources are always released, even when exceptions occur.

"""TextPipe COM context manager for clean resource management."""
import win32com.client
import pythoncom
from contextlib import contextmanager


@contextmanager
def textpipe_session(log_file: str = None):
    """Context manager for TextPipe COM automation sessions.

    Usage:
        with textpipe_session() as tp:
            tp.LoadFilter(r"C:\\Filters\\my_filter.fll")
            tp.AddInputFile(r"C:\\Data\\input.csv")
            tp.OutputFile = r"C:\\Data\\output.csv"
            tp.Go()
    """
    pythoncom.CoInitialize()
    tp = None

    try:
        tp = win32com.client.Dispatch("TextPipe.Application")
        tp.Silent = True
        tp.Visible = False
        tp.OverwriteOutput = True

        if log_file:
            tp.LogFile = log_file

        yield tp

    finally:
        if tp:
            tp.Quit()
            del tp
        pythoncom.CoUninitialize()


# --- Usage Examples ---

def example_single_file():
    """Transform a single file using the context manager."""
    with textpipe_session() as tp:
        tp.LoadFilter(r"C:\Filters\normalize_dates.fll")
        tp.AddInputFile(r"C:\Data\report.csv")
        tp.OutputFile = r"C:\Data\report_normalized.csv"

        count = tp.Go()
        print(f"Processed {count} file(s)")


def example_multi_step_pipeline():
    """Run a multi-step transformation pipeline."""
    steps = [
        (r"C:\Filters\step1_clean.fll", r"C:\Data\raw.csv", r"C:\Temp\step1.csv"),
        (r"C:\Filters\step2_transform.fll", r"C:\Temp\step1.csv", r"C:\Temp\step2.csv"),
        (r"C:\Filters\step3_validate.fll", r"C:\Temp\step2.csv", r"C:\Data\final.csv"),
    ]

    with textpipe_session(log_file=r"C:\Logs\pipeline.log") as tp:
        for i, (filter_path, input_file, output_file) in enumerate(steps, 1):
            tp.ClearInputFiles()
            tp.LoadFilter(filter_path)
            tp.AddInputFile(input_file)
            tp.OutputFile = output_file

            count = tp.Go()
            print(f"Step {i}: {count} file(s) processed -> {output_file}")


if __name__ == "__main__":
    example_single_file()
    example_multi_step_pipeline()

Complete Production Example

A production-ready script combining all patterns: argument parsing, logging, batch processing, verification, and proper error reporting.

"""
Production TextPipe automation script.
Usage: python textpipe_transform.py --filter path.fll --input dir --output dir
"""
import argparse
import logging
import os
import sys
from datetime import datetime
from pathlib import Path

import win32com.client
import pythoncom


logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler(f"textpipe_{datetime.now():%Y%m%d}.log"),
        logging.StreamHandler(),
    ],
)
logger = logging.getLogger(__name__)


class TextPipeError(Exception):
    """Custom exception for TextPipe automation errors."""
    pass


class TextPipeAutomation:
    """Production TextPipe COM automation wrapper."""

    def __init__(self):
        self._tp = None
        pythoncom.CoInitialize()

    def __enter__(self):
        try:
            self._tp = win32com.client.Dispatch("TextPipe.Application")
        except Exception as e:
            raise TextPipeError(
                "Cannot create TextPipe COM object. "
                "Ensure TextPipe Pro is installed and registered."
            ) from e

        self._tp.Silent = True
        self._tp.Visible = False
        self._tp.OverwriteOutput = True
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self._tp:
            self._tp.Quit()
            del self._tp
        pythoncom.CoUninitialize()
        return False

    def transform(self, filter_path: str, input_path: str, output_path: str) -> int:
        """Execute a transformation and return files processed count."""
        if not os.path.exists(filter_path):
            raise TextPipeError(f"Filter file not found: {filter_path}")

        self._tp.ClearInputFiles()

        if not self._tp.LoadFilter(filter_path):
            raise TextPipeError(f"Load filter failed: {self._tp.GetLastError()}")

        if os.path.isdir(input_path):
            self._tp.AddInputFolder(input_path, "*.*", True)
            os.makedirs(output_path, exist_ok=True)
            self._tp.OutputFolder = output_path
        else:
            if not os.path.exists(input_path):
                raise TextPipeError(f"Input file not found: {input_path}")
            self._tp.AddInputFile(input_path)
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            self._tp.OutputFile = output_path

        count = self._tp.Go()
        if count == 0:
            error = self._tp.GetLastError()
            if error:
                raise TextPipeError(f"Transform failed: {error}")
            logger.warning("No files were processed (input may be empty)")

        return count


def main():
    parser = argparse.ArgumentParser(description="TextPipe batch transformation")
    parser.add_argument("--filter", required=True, help="Path to .fll filter file")
    parser.add_argument("--input", required=True, help="Input file or directory")
    parser.add_argument("--output", required=True, help="Output file or directory")
    args = parser.parse_args()

    logger.info(f"Starting transformation: {args.filter}")
    logger.info(f"Input: {args.input}")
    logger.info(f"Output: {args.output}")

    try:
        with TextPipeAutomation() as automation:
            count = automation.transform(args.filter, args.input, args.output)
            logger.info(f"Complete: {count} file(s) processed successfully")

    except TextPipeError as e:
        logger.error(f"TextPipe error: {e}")
        sys.exit(1)
    except Exception as e:
        logger.error(f"Unexpected error: {e}")
        sys.exit(2)


if __name__ == "__main__":
    main()

Next Steps