Extracting Specific Details

Get help with installation and running here.

Moderators: DataMystic Support, Moderators

xyber
Posts: 17
Joined: Sat Feb 20, 2010 5:34 am

Extracting Specific Details

Postby xyber » Sat Feb 20, 2010 5:59 am

Hello There,

I am tasked to do data tabulation on my old intranet site for my workplace which comprise of huge listing in HTML pages. I am wondering if datapipe is able to do the following task.
-Extract specific tags from the <Html>
-Fill it in a csv file
And whether it is possible for datapipe to extract multiple information from the html pages by patterns and fill it in a csv/excel file. Please do let me know if its possible to do so and how much its license would cost. Thank you.

Rgrds.

User avatar
DataMystic Support
Site Admin
Posts: 2136
Joined: Mon Jun 30, 2003 12:32 pm
Location: Melbourne, Australia
Contact:

Re: Extracting Specific Details

Postby DataMystic Support » Mon Feb 22, 2010 7:20 am

You need TextPipe (http://www.datamystic.com/textpipe.html) for this, not DataPipe.

Please see the docs on web site data mining - http://www.datamystic.com/docs
Regards,

Simon Carter, http://DataMystic.com/forums/index.php
http://PredictBGL.com - Insulin dose calculator for Type 1 diabetes
http://DownloadPipe.com - 250,000 free software downloads
http://DetachPipe.com - send huge email attachments

xyber
Posts: 17
Joined: Sat Feb 20, 2010 5:34 am

Re: Extracting Specific Details

Postby xyber » Wed Feb 24, 2010 4:20 am

Hello,

I've followed your link and found textpipe. However i'm still not clear on how to use the patterns. Lets take for example i have a page that looks like this.

Code: Select all

<title>BIC PRODUCT (SINGAPORE) PTE LTD</title>
<meta name="keywords" content="BIC PRODUCT (SINGAPORE) PTE LTD">
<meta name="description" content="BIC PRODUCT (SINGAPORE) PTE LTD | Classified Section-->STATIONERS  | BIC has built its success on a clear vision: to make top-quality, affordable
BIC® products available to everyone.">
<style type="text/css">
<!--
body {
   background-image: url(default.htm);
   margin-left: 0px;
   margin-top: 0px;
}
-->
</style>
<link href="TD.CSS" rel="stylesheet" type="text/css">
<style type="text/css">
<!--
a:link {
   color: #FF6600;
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 9px;
}
a:visited {
   color: #FF3300;
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 9px;
}
a:hover {
   color: #CC0000;
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 9px;
}
a:active {
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 9px;
}

.prodName a:link {
   color: #FF6600;
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 9px;
}
.prodName a:visited {
   color: #FF3300;
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 9px;
}
.prodName a:hover {
   color: #CC0000;
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 9px;
}
.prodName a:active {
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 9px;
}
-->
</style>
<script language="JavaScript" type="text/JavaScript">
<!--
function MM_swapImgRestore() { //v3.0
  var i,x,a=document.MM_sr; for(i=0;a&&i<a.length&&(x=a[i])&&x.oSrc;i++) x.src=x.oSrc;
}

function MM_preloadImages() { //v3.0
  var d=document; if(d.images){ if(!d.MM_p) d.MM_p=new Array();
    var i,j=d.MM_p.length,a=MM_preloadImages.arguments; for(i=0; i<a.length; i++)
    if (a[i].indexOf("#")!=0){ d.MM_p[j]=new Image; d.MM_p[j++].src=a[i];}}
}

function MM_findObj(n, d) { //v4.01
  var p,i,x;  if(!d) d=document; if((p=n.indexOf("?"))>0&&parent.frames.length) {
    d=parent.frames[n.substring(p+1)].document; n=n.substring(0,p);}
  if(!(x=d[n])&&d.all) x=d.all[n]; for (i=0;!x&&i<d.forms.length;i++) x=d.forms[i][n];
  for(i=0;!x&&d.layers&&i<d.layers.length;i++) x=MM_findObj(n,d.layers[i].document);
  if(!x && d.getElementById) x=d.getElementById(n); return x;
}

function MM_swapImage() { //v3.0
  var i,j=0,x,a=MM_swapImage.arguments; document.MM_sr=new Array; for(i=0;i<(a.length-2);i+=3)
   if ((x=MM_findObj(a[i]))!=null){document.MM_sr[j++]=x; if(!x.oSrc) x.oSrc=x.src; x.src=a[i+2];}
}
//-->
</script>
</head>

<body onLoad="MM_preloadImages('images/But_TOC2.jpg','images/But_OD2.jpg')">
<form name="Form1" method="post" action="CompanyProfile_MG.aspx?cocode=80000004&amp;dirid=110&amp;coname=BIC+PRODUCT+SINGAPORE+PTE+LTD" id="Form1">
<div>
<input type="hidden" name="__EVENTTARGET" id="__EVENTTARGET" value="" />
<input type="hidden" name="__EVENTARGUMENT" id="__EVENTARGUMENT" value="" />
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKLTY1MTY0NTEwNg9kFgJmD2QWDmYPD2QPEBYCZgIBFgIWAh4OUGFyYW1ldGVyVmFsdWVkFgIfAGQWAgIDAgNkZAIDDzwrAAkAZAIEDzwrAAkBAA8WBB4IRGF0YUtleXMWAB4LXyFJdGVtQ291bnQCAWQWAmYPZBYCZg8VAhJDbGFzc2lmaWVkIFNlY3Rpb24KU1RBVElPTkVSU2QCBQ88KwAJAQAPFgQfARYAHwICAWQWAmYPZBYCZg8VAQpTVEFUSU9ORVJTZAIGDzwrAAkBAA8WBB8BFgAfAgIBZBYCZg9kFgJmDxUBClNUQVRJT05FUlNkAgcPPCsACQEADxYEHwEWAB8CAgFkFgJmD2QWBmYPFQEDMjIzZAIBDxUEEkNsYXNzaWZpZWQrU2VjdGlvbgM4NDkSQ2xhc3NpZmllZCBTZWN0aW9uAzIyM2QCAg8VAwQzNzAwClNUQVRJT05FUlMKU1RBVElPTkVSU2QCCA9kFgoCAQ8PFgIeB1Zpc2libGVoZGQCAw8PFgIfA2hkZAIFDw8WAh8DaGRkAgYPDxYCHghjaGVja2JveAUEV1VRMmQWAmYPDxYCHghJbWFnZVVybAVCLi4vaW1hZ2VzL0F1dGhlbnRpY2F0ZS8yMDEwMjIzY3djcHc0enhwemJiZm11dmFzdHpveWYxNjcyOTAyNjIucG5nZGQCCQ8PFgIfA2hkZBgBBR5fX0NvbnRyb2xzUmVxdWlyZVBvc3RCYWNrS2V5X18WAQURY29udGVudHMxJGJ0bkluZm/mRUA0blY9NgM1piLBW0rl9rUAeg==" />
</div>

<script type="text/javascript">
<!--
var theForm = document.forms['Form1'];
if (!theForm) {
    theForm = document.Form1;
}
function __doPostBack(eventTarget, eventArgument) {
    if (!theForm.onsubmit || (theForm.onsubmit() != false)) {
        theForm.__EVENTTARGET.value = eventTarget;
        theForm.__EVENTARGUMENT.value = eventArgument;
        theForm.submit();
    }
}
// -->
</script>


<script src="WebResource.axd@d=qOzOnzMFB1K_fNKp-QMhRQ2&t=633209583258281250" type="text/javascript"></script>


<script src="ScriptResource.axd@d=4yI66X5vEBH5-Ybc2a53skDG-98K0Q1-qxkWnBwTiErHUc_uICLv82l8Mj7CFCuaCkIN3GDjqwu83twrjWCuft_uOQRUbBKtLhR8E_cco9Q1&t=633214132339218750" type="text/javascript"></script>
<script src="ScriptResource.axd@d=4yI66X5vEBH5-Ybc2a53skDG-98K0Q1-qxkWnBwTiErHUc_uICLv82l8Mj7CFCuaCkIN3GDjqwu83twrjWCufiFRX8eGBsPvwdjRVBCfLPN5MyrrGz1g3Ux1HhQ5Ipwo0&t=633214132339218750" type="text/javascript"></script>
<script src="ScriptResource.axd@d=F_xKGNp0k3UabMl5uP81y-YbEJjia5seImjNRk161LXCFvYHovuzlbThUN6Ts39etqvumTbqnJ5z_OPZvrhnsg2&t=633470685644542518" type="text/javascript"></script>
<script src="ScriptResource.axd@d=F_xKGNp0k3UabMl5uP81y-YbEJjia5seImjNRk161LUKZx6bxOdNYMJxUWJVBfipcuvf49fFzGI3ZxFPdZ9UCw2&t=633470685644542518" type="text/javascript"></script>
<script src="ScriptResource.axd@d=F_xKGNp0k3UabMl5uP81y-YbEJjia5seImjNRk161LVDgtN4S8h6A8lAndG6gtP7PmFiotegSUwfj56bfgnBoQ2&t=633470685644542518" type="text/javascript"></script>
<script src="ScriptResource.axd@d=F_xKGNp0k3UabMl5uP81y-YbEJjia5seImjNRk161LVxKvlFmN3D1zBWLmMW-B5hgh8fkEJApN-IsotHF1sdKutmXE-rz9EaAbaZgiK4oCA1&t=633470685644542518" type="text/javascript"></script>
<script src="ScriptResource.axd@d=F_xKGNp0k3UabMl5uP81y-YbEJjia5seImjNRk161LVDgtN4S8h6A8lAndG6gtP7a61dyhAIXNOhOAo3IrcLs6qmkaSobZMihxY1qpiD4Y41&t=633470685644542518" type="text/javascript"></script>

 

     

<script type="text/aavascript">
var gaJsHost = (("https:" == document.location.protocol) ? "../https@ssl./" : "../www./");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
try {
var pageTracker = _gat._getTracker("UA-4861434-24");
pageTracker._trackPageview();
} catch(err) {}</script>


<style type="text/css">
<!--
a:link {
   color: #FF6600;
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 11px;
}
a:visited {
   color: #FF3300;
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 11px;
}
a:hover {
   color: #CC0000;
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 11px;
}
a:active {
   text-decoration: none;
   font-family: Verdana, Arial, Helvetica, sans-serif;
   font-size: 11px;
}
-->
</style>
<table width="97%"  border="0" align="left" cellpadding="0" cellspacing="0">
            <tr>
              <td width="29" valign="top"><a href="#" onMouseOut="MM_swapImgRestore()" onMouseOver="MM_swapImage('Image22','','images/But_TOC2.jpg',1)"></a><a href="#" onMouseOut="MM_swapImgRestore()" onMouseOver="MM_swapImage('Image23','','images/But_OD2.jpg',1)"></a>
                <table width="29" border="0" cellspacing="0" cellpadding="0">
                <!--start of contents button-->
                 
<tr>
    <td width="29" height="150">
    <!--<a href="#" onMouseOut="MM_swapImgRestore()" onMouseOver="MM_swapImage('Image22','','images/But_TOC2.jpg',1)"><img src="images/But_TOC1.jpg" name="Image22" width="29" height="150" border="0"></a>-->
        <script type="text/javascript">
//<![CDATA[
Sys.WebForms.PageRequestManager._initialize('contents1$ScriptManager1', document.getElementById('Form1'));
Sys.WebForms.PageRequestManager.getInstance()._updateControls([], [], [], 90);
//]]>
</script>

    <div class="demoarea">
   
        <p>
            <!-- Button used to launch the animation -->
            <input type="image" name="contents1$btnInfo" id="contents1_btnInfo" onMouseOver="this.src='Images/But_TOC2.jpg';" onMouseOut="this.src='Images/But_TOC1.jpg';" src="Images/But_TOC1.jpg" onClick="return false;" style="border-width:0px;" />
        </p>
       
        <!-- "Wire frame" div used to transition from the button to the info panel -->
        <div id="flyout" style="display: none; overflow: hidden; z-index: 2; background-color: #ffffff; border: solid 1px #D0D0D0; "></div>
       
        <!-- Info panel to be displayed as a flyout when the button is clicked -->
        <div id="info" style="display: none; width: 160px; z-index: 2; opacity: 0; filter: progid:DXImageTransform.Microsoft.Alpha(opacity=0); font-size: 12px; border: solid 1px #CCCCCC; background-color: #E8E8E8; padding: 0;" onmouseleave="CloseMenu();" >
            <div id="btnCloseParent" style="float: right; opacity: 0; filter: progid:DXImageTransform.Microsoft.Alpha(opacity=0); height:10px">
                <a onClick="return false;" id="contents1_btnClose" title="Close" href="companyprofile_mg.aspx?cocode=80000004&amp;dirid=110&amp;coname=bic+product+singapore+pte+ltd@__EVENTTARGET=contents1_2524btnClose&__EVENTARGUMENT=" style="background-color: #666666;  color: #FFFFFF; text-align: center; font-weight: bold; text-decoration: none; border: outset thin #FFFFFF; padding:0px 5px 0px 5px ">X</a>
            </div>
            <div id="menuitems">
                     <br />                     
                     <span class="contentsMenuItem" ><a href="IndexBM.aspx@DirID=110&version=English" class="link_txt_content">&nbsp;Main Page</a></span><hr noshade />
                     
                           <span class="contentsMenuItem" ><a href="IndustryNews.aspx@DirID=110" class="link_txt_content">&nbsp;News</a></span><hr noshade />
                  <span class='contentsMenuItem'><div align='left'>&nbsp;<a href='Classifications_MG.aspx@catid=1&dirid=110&classid=2&name=Corporate+Profile&mid=851' class='link_txt_content'>Corporate Profile</a></div></span><hr noshade /><span class='contentsMenuItem'><div align='left'>&nbsp;<a href='CompanyListings_MG.aspx@DirID=110&name=Company+Listings&mid=847' class='link_txt_content'>Company Listings</a></div></span><hr noshade /><span class='contentsMenuItem'><div align='left'>&nbsp;<a href='Products_Services_MG.aspx@DirID=110&name=Directory+Category+%26nbsp_253bListings&mid=848' class='link_txt_content'>Directory Category &nbsp;Listings</a></div></span><hr noshade /><span class='contentsMenuItem'><div align='left'>&nbsp;<a href='IndProf/BM/Government%20Development%20Assistance%20Programmes.pdf#&mid=845' class='link_txt_content' target=_blank >Government &nbsp;Development &nbsp;Assistance &nbsp;Programmes<br></a></div></span><hr noshade /><span class='contentsMenuItem'><div align='left'>&nbsp;<a href='../www.sgmeetings.com/#&name=Conventions+%26+%26nbsp%3bExhibitions&mid=846/default.htm' class='link_txt_content'>Conventions & &nbsp;Exhibitions</a></div></span><hr noshade /><span class='contentsMenuItem'><div align='left'>&nbsp;<a href='classifications_MG.aspx@catid=18176&DirID=110&name=Government+Agencies+%26nbsp_253b%26+Professional+Bodies_253cbr_253e&mid=850' class='link_txt_content'>Government Agencies &nbsp;& Professional Bodies<br></a></div></span><hr noshade /><span class='contentsMenuItem'><div align='left'>&nbsp;<a href='classifications_MG.aspx@catid=223&DirID=110&name=Classified+Section&mid=849' class='link_txt_content'>Classified Section</a></div></span><hr noshade /><span class='contentsMenuItem'><div align='left'>&nbsp;<a href='classifications_MG.aspx@catid=18182&DirID=110&classid=9999&name=Chambers+Of+Commerce&mid=852' class='link_txt_content'>Chambers Of Commerce</a></div></span><br></div>
        <script type="text/javascript" language="javascript">
            // Move an element directly on top of another element (and optionally
            // make it the same size)
            function Cover(bottom, top, ignoreSize) {
                var location = Sys.UI.DomElement.getLocation(bottom);
                top.style.position = 'absolute';
                top.style.top = location.y + 'px';
                top.style.left = location.x + 'px';
                if (!ignoreSize) {
                    top.style.height = bottom.offsetHeight + 'px';
                    top.style.width = bottom.offsetWidth + 'px';
                }
            }
            function CloseMenu()
            {
                var info =$get('info');
                info.style.display='none';
                info.style.width='160px';
                info.style.fontSize='12px';
           
            }
        </script>
       
       
       
       
    </div>

    </td>
</tr>
                  <!--end of contents button-->
                 
                </table></td>
              <td width="751" valign="top" class="copyright">
              <table width="98%"  border="0"  cellpadding="0" cellspacing="0">
                <tr>
                  <td height="5" colspan="2"></td>
                </tr>
               
                <tr>
                  <td><div align="left">
                    <table width="100%"  border="0" align="center" cellpadding="0" cellspacing="0">
                      <tr>
                        <td width="6" height="21"><img src="images/TitleBar_L.jpg" width="6" height="21"></td>
                        <td background="images/TitleBar_Ctr.jpg" class="BarTitleMain">BIC PRODUCT (SINGAPORE) PTE LTD </td>
                        <td width="6" height="21"><img src="images/TitleBar_R.jpg" width="6" height="21"></td>
                      </tr>
                    </table>
                  </div></td>
                  </tr>           
               
                <tr valign="top">
                  <td height="5" colspan="2" class="copyright"></td>
                </tr>
               
            <!-- start of display for free listing-->
                <tr>
                  <td><div align="left">
                    <table width="710"  border="0" align="center" cellpadding="0" cellspacing="0">
                      <tr>
                        <td bgcolor="#DCD9D9">&nbsp;</td>
                      </tr>
                      <tr>
                        <td bgcolor="#DCD9D9"><table width="98%"  border="0" align="center" cellpadding="0" cellspacing="0" bgcolor="#FFFFFF">
                         
                            <tr>
                              <td height="25" class="bodytextBold">Address:</td>
                              <td height="25" class="bodytxt" >  <div align="left" class="BodyTxt2" >       
                             
                         
                                 60 Alexandra Terrace
                        #05-05 The Comtech<br>
                        Singapore 118502
                     
                            
                         
                          </div>
                              </td>
                              </tr>
                            <tr>
                              <td height="1" colspan="2" bgcolor="#CCCCCC"></td>
                              </tr>
                           
                            <tr>
                              <td width="16%" height="25" class="bodytextBold">Telephone:</td>
                              <td height="25" class="bodytxt" >
                                <div align="left" class="BodyTxt2" >                                                                 
                           
                                       (65) 6227 3066
                                </div>
                              </td>
                              </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                           
                            <tr>
                              <td width="16%" height="25" class="bodytextBold">Fax:</td>
                              <td height="25" class="bodytxt" >
                                <div align="left" class="BodyTxt2" >                                                                 
                           
                                       (65) 6227 4150
                                </div>
                              </td>
                              </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                           
                            <tr>
                              <td width="16%" height="25" class="bodytextBold">Email:</td>
                              <td height="25" class="bodytxt" >
                                <div align="left" class="BodyTxt2" >                                                                 
                           
                                       <script language=javascript>var username = 'kucy.ng';var hostname ='bicworld.com';var linktext = username + '@' + hostname;document.write(username +'@' + hostname )</script>
                                </div>
                              </td>
                              </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                           
                            <tr>
                              <td width="16%" height="25" class="bodytextBold" valign=top>Nature of Business:</td>
                              <td height="25" class="bodytxt" >
                                <div align="left" class="BodyTxt2" >                                                                 
                           
                                       BIC has built its success on a clear vision: to make top-quality, affordable
BIC® products available to everyone.
                                </div>
                              </td>
                            </tr>
              <tr>
                              <td height="1" colspan="2" class="bodytextBold">&nbsp;</td>
                            </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                                             
                            <tr>
                              <td height="25" class="bodytextBold">Categories:<br></td>
                              <td height="25">
                                <div align="left" class="bodytxt">
                                 <table id="dtlCat3" cellspacing="0" border="0" style="height:0px;border-collapse:collapse;">
   <tr>
      <td style="height:0px;">
                              <FONT class="BodyTxt2" style="HEIGHT: 15px">Classified Section -> STATIONERS
                                 </FONT>
                           </td>
   </tr>
</table>
                                 </div></td>
                              </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                           
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                        </table></td>
                      </tr>
                      <tr>
                        <td><img src="images/CoProfile_emailBg.gif" width="710" height="6"></td>
                      </tr>
                    </table>
                    </div>                    </td>
                  </tr>
                <tr>
                  <td height="10">&nbsp;</td>
                  </tr>
            <!-- end of display for free listing-->                              
               
                <!--Add Email here-->
                <td colspan="2"></td>
                </tr>
              </table></td>
            </tr>
           
          </table>         
          <p>&nbsp;</p>
          <p>&nbsp;</p>
          <p>&nbsp;</p>
          <p>&nbsp;</p>
          <p>&nbsp;</p>
          <p>&nbsp;</p>
          <p>&nbsp;</p>
          <p>&nbsp;</p></td>
      </tr>
    </table></td>
    <td background="images/bg3.jpg"style="{background-repeat:no-repeat}">&nbsp;</td>
  </tr>
</table>
<map name="Map">
  <area shape="rect" coords="9,3,140,14" href="#">
  <area shape="rect" coords="12,14,209,27" href="#">
   
</map>

<div>

   <input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEWAwK71NeTCAKLqo6tCALN/IGYDmDkTmpoYPtkhG2SIX3sM4UBwbIa" />
</div>

<script type="text/javascript">
<!--
Sys.Application.initialize();
Sys.Application.add_init(function() {
    $create(AjaxControlToolkit.Animation.AnimationBehavior, {"OnMouseOver":"{\"AnimationName\":\"Sequence\",\"AnimationChildren\":[{\"AnimationName\":\"ScriptAction\",\"Script\":\"Cover($get(\\u0027contents1_btnInfo\\u0027), $get(\\u0027flyout\\u0027));\",\"AnimationChildren\":[]},{\"AnimationName\":\"StyleAction\",\"AnimationTarget\":\"flyout\",\"Attribute\":\"display\",\"Value\":\"block\",\"AnimationChildren\":[]},{\"AnimationName\":\"Parallel\",\"AnimationTarget\":\"flyout\",\"Duration\":\".5\",\"Fps\":\"25\",\"AnimationChildren\":[{\"AnimationName\":\"Move\",\"Horizontal\":\"28\",\"Vertical\":\"0\",\"AnimationChildren\":[]},{\"AnimationName\":\"Resize\",\"Width\":\"160\",\"AnimationChildren\":[]},{\"AnimationName\":\"Color\",\"PropertyKey\":\"backgroundColor\",\"StartValue\":\"#FF0000\",\"EndValue\":\"#FFFFFF\",\"AnimationChildren\":[]}]},{\"AnimationName\":\"ScriptAction\",\"Script\":\"Cover($get(\\u0027flyout\\u0027), $get(\\u0027info\\u0027), true);\",\"AnimationChildren\":[]},{\"AnimationName\":\"StyleAction\",\"AnimationTarget\":\"info\",\"Attribute\":\"display\",\"Value\":\"block\",\"AnimationChildren\":[]},{\"AnimationName\":\"FadeIn\",\"AnimationTarget\":\"info\",\"Duration\":\".5\",\"AnimationChildren\":[]},{\"AnimationName\":\"StyleAction\",\"AnimationTarget\":\"flyout\",\"Attribute\":\"display\",\"Value\":\"none\",\"AnimationChildren\":[]},{\"AnimationName\":\"Parallel\",\"AnimationTarget\":\"info\",\"Duration\":\".5\",\"AnimationChildren\":[{\"AnimationName\":\"FadeIn\",\"AnimationTarget\":\"btnCloseParent\",\"MaximumOpacity\":\".9\",\"AnimationChildren\":[]},{\"AnimationName\":\"Color\",\"PropertyKey\":\"borderColor\",\"StartValue\":\"#ff0000\",\"EndValue\":\"#666666\",\"AnimationChildren\":[]},{\"AnimationName\":\"Color\",\"PropertyKey\":\"color\",\"StartValue\":\"#666666\",\"EndValue\":\"#FD5A41\",\"AnimationChildren\":[]}]}]}","id":"contents1_OpenAnimation"}, null, null, $get("contents1_btnInfo"));
});
Sys.Application.add_init(function() {
    $create(AjaxControlToolkit.Animation.AnimationBehavior, {"OnClick":"{\"AnimationName\":\"Sequence\",\"AnimationTarget\":\"info\",\"AnimationChildren\":[{\"AnimationName\":\"StyleAction\",\"Attribute\":\"overflow\",\"Value\":\"hidden\",\"AnimationChildren\":[]},{\"AnimationName\":\"Parallel\",\"Duration\":\".8\",\"Fps\":\"15\",\"AnimationChildren\":[{\"AnimationName\":\"Scale\",\"ScaleFactor\":\"0.08\",\"Center\":\"false\",\"ScaleFont\":\"true\",\"FontUnit\":\"px\",\"AnimationChildren\":[]},{\"AnimationName\":\"FadeOut\",\"AnimationChildren\":[]}]},{\"AnimationName\":\"StyleAction\",\"Attribute\":\"display\",\"Value\":\"none\",\"AnimationChildren\":[]},{\"AnimationName\":\"StyleAction\",\"Attribute\":\"width\",\"Value\":\"160px\",\"AnimationChildren\":[]},{\"AnimationName\":\"StyleAction\",\"Attribute\":\"height\",\"Value\":\"\",\"AnimationChildren\":[]},{\"AnimationName\":\"StyleAction\",\"Attribute\":\"fontSize\",\"Value\":\"12px\",\"AnimationChildren\":[]},{\"AnimationName\":\"OpacityAction\",\"AnimationTarget\":\"btnCloseParent\",\"Opacity\":\"0\",\"AnimationChildren\":[]}]}","OnMouseOut":"{\"AnimationName\":\"Color\",\"Duration\":\".2\",\"PropertyKey\":\"color\",\"StartValue\":\"#FF0000\",\"EndValue\":\"#FFFFFF\",\"AnimationChildren\":[]}","OnMouseOver":"{\"AnimationName\":\"Color\",\"Duration\":\".2\",\"PropertyKey\":\"color\",\"StartValue\":\"#FFFFFF\",\"EndValue\":\"#FF0000\",\"AnimationChildren\":[]}","id":"contents1_CloseAnimation"}, null, null, $get("contents1_btnClose"));
});
// -->
</script>
</form>
</body>
</html>


How do i tell text pipe that all i am looking for is just the following data in the a nested table:

Code: Select all

<table width="98%"  border="0"  cellpadding="0" cellspacing="0">
                <tr>
                  <td height="5" colspan="2"></td>
                </tr>
               
                <tr>
                  <td><div align="left">
                    <table width="100%"  border="0" align="center" cellpadding="0" cellspacing="0">
                      <tr>
                        <td width="6" height="21"><img src="images/TitleBar_L.jpg" width="6" height="21"></td>
                        <td background="images/TitleBar_Ctr.jpg" class="BarTitleMain">BIC PRODUCT (SINGAPORE) PTE LTD </td>
                        <td width="6" height="21"><img src="images/TitleBar_R.jpg" width="6" height="21"></td>
                      </tr>
                    </table>
                  </div></td>
                  </tr>           
               
                <tr valign="top">
                  <td height="5" colspan="2" class="copyright"></td>
                </tr>
               
            <!-- start of display for free listing-->
                <tr>
                  <td><div align="left">
                    <table width="710"  border="0" align="center" cellpadding="0" cellspacing="0">
                      <tr>
                        <td bgcolor="#DCD9D9">&nbsp;</td>
                      </tr>
                      <tr>
                        <td bgcolor="#DCD9D9"><table width="98%"  border="0" align="center" cellpadding="0" cellspacing="0" bgcolor="#FFFFFF">
                         
                            <tr>
                              <td height="25" class="bodytextBold">Address:</td>
                              <td height="25" class="bodytxt" >  <div align="left" class="BodyTxt2" >       
                             
                         
                                 60 Alexandra Terrace
                        #05-05 The Comtech<br>
                        Singapore 118502
                     
                            
                         
                          </div>
                              </td>
                              </tr>
                            <tr>
                              <td height="1" colspan="2" bgcolor="#CCCCCC"></td>
                              </tr>
                           
                            <tr>
                              <td width="16%" height="25" class="bodytextBold">Telephone:</td>
                              <td height="25" class="bodytxt" >
                                <div align="left" class="BodyTxt2" >                                                                 
                           
                                       (65) 6227 3066
                                </div>
                              </td>
                              </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                           
                            <tr>
                              <td width="16%" height="25" class="bodytextBold">Fax:</td>
                              <td height="25" class="bodytxt" >
                                <div align="left" class="BodyTxt2" >                                                                 
                           
                                       (65) 6227 4150
                                </div>
                              </td>
                              </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                           
                            <tr>
                              <td width="16%" height="25" class="bodytextBold">Email:</td>
                              <td height="25" class="bodytxt" >
                                <div align="left" class="BodyTxt2" >                                                                 
                           
                                       <script language=javascript>var username = 'kucy.ng';var hostname ='bicworld.com';var linktext = username + '@' + hostname;document.write(username +'@' + hostname )</script>
                                </div>
                              </td>
                              </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                           
                            <tr>
                              <td width="16%" height="25" class="bodytextBold" valign=top>Nature of Business:</td>
                              <td height="25" class="bodytxt" >
                                <div align="left" class="BodyTxt2" >                                                                 
                           
                                       BIC has built its success on a clear vision: to make top-quality, affordable
BIC® products available to everyone.
                                </div>
                              </td>
                            </tr>
              <tr>
                              <td height="1" colspan="2" class="bodytextBold">&nbsp;</td>
                            </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                                             
                            <tr>
                              <td height="25" class="bodytextBold">Categories:<br></td>
                              <td height="25">
                                <div align="left" class="bodytxt">
                                 <table id="dtlCat3" cellspacing="0" border="0" style="height:0px;border-collapse:collapse;">
   <tr>
      <td style="height:0px;">
                              <FONT class="BodyTxt2" style="HEIGHT: 15px">Classified Section -> STATIONERS
                                 </FONT>
                           </td>
   </tr>
</table>
                                 </div></td>
                              </tr>
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                           
                            <tr bgcolor="#CCCCCC">
                              <td height="1" colspan="2" class="bodytextBold"></td>
                            </tr>
                        </table></td>
                      </tr>
                      <tr>
                        <td><img src="images/CoProfile_emailBg.gif" width="710" height="6"></td>
                      </tr>
                    </table>
                    </div>                    </td>
                  </tr>
                <tr>
                  <td height="10">&nbsp;</td>
                  </tr>
            <!-- end of display for free listing-->                              
               
                <!--Add Email here-->
                <td colspan="2"></td>
                </tr>
              </table>

Ultimately all i want is this.

Code: Select all

Company Name Column :BIC PRODUCT (SINGAPORE) PTE LTD
Telephone Column: (65) 6227 3066
Fax Column: (65) 6227 4150
Email Column:
Nature of Business Column: BIC has built its success on a clear vision: to make top-quality, affordable BIC® products available to everyone.
Category Column: Classified Section -> STATIONERS

Is it possible?
Rgrds.

User avatar
DataMystic Support
Site Admin
Posts: 2136
Joined: Mon Jun 30, 2003 12:32 pm
Location: Melbourne, Australia
Contact:

Re: Extracting Specific Details

Postby DataMystic Support » Wed Feb 24, 2010 5:52 am

Using an EasyPattern match, just replace each variable section (the data you need), with

Code: Select all

[capture(0+chars)]


Then in the replace text, specify

Code: Select all

$1,$2,$3,$4....\r\n

up to the number of fields you have. Turn the Extract Matches option on.
Regards,

Simon Carter, http://DataMystic.com/forums/index.php
http://PredictBGL.com - Insulin dose calculator for Type 1 diabetes
http://DownloadPipe.com - 250,000 free software downloads
http://DetachPipe.com - send huge email attachments

xyber
Posts: 17
Joined: Sat Feb 20, 2010 5:34 am

Re: Extracting Specific Details

Postby xyber » Wed Feb 24, 2010 5:50 pm

Sorry i don't really quite get it. Where do i add the Easy pattern and replace text? Do i add it in the Textpipe console? If so how do i mark the place where i want textpipe to extract?

Rgrds.

User avatar
DataMystic Support
Site Admin
Posts: 2136
Joined: Mon Jun 30, 2003 12:32 pm
Location: Melbourne, Australia
Contact:

Re: Extracting Specific Details

Postby DataMystic Support » Wed Feb 24, 2010 7:12 pm

Add a Filters\Replace\Find EasyPattern.
Regards,

Simon Carter, http://DataMystic.com/forums/index.php
http://PredictBGL.com - Insulin dose calculator for Type 1 diabetes
http://DownloadPipe.com - 250,000 free software downloads
http://DetachPipe.com - send huge email attachments

xyber
Posts: 17
Joined: Sat Feb 20, 2010 5:34 am

Re: Extracting Specific Details

Postby xyber » Thu Feb 25, 2010 5:16 am

Hi,

I've tried experimenting with various filters and have ultimately been able to cut out all the unnecessary data. My next question is how do i export to an excel file all the datas into rows and columns? Thank you for your patience.

Rgrds.

User avatar
DataMystic Support
Site Admin
Posts: 2136
Joined: Mon Jun 30, 2003 12:32 pm
Location: Melbourne, Australia
Contact:

Re: Extracting Specific Details

Postby DataMystic Support » Thu Feb 25, 2010 7:35 am

Right click the Replace field, and choose Insert Matched Subexpression\CSV Fields. Add a line feed as well.

Then set the output filter to add a .csv extension - then it will open in Excel.
Regards,

Simon Carter, http://DataMystic.com/forums/index.php
http://PredictBGL.com - Insulin dose calculator for Type 1 diabetes
http://DownloadPipe.com - 250,000 free software downloads
http://DetachPipe.com - send huge email attachments

xyber
Posts: 17
Joined: Sat Feb 20, 2010 5:34 am

Re: Extracting Specific Details

Postby xyber » Fri Feb 26, 2010 3:20 am

Hi,

Which replace field should i select? Were you referring to the list of available filters? Also was the line feed an option in there? Or should i add something specific for the line feed?

Thank you very much.

Rgrds.

User avatar
DataMystic Support
Site Admin
Posts: 2136
Joined: Mon Jun 30, 2003 12:32 pm
Location: Melbourne, Australia
Contact:

Re: Extracting Specific Details

Postby DataMystic Support » Mon Mar 01, 2010 5:48 am

Use Filters\Replace\Find EasyPattern

In the replace field, you can enter a line feed by pressing return, or by typing

Code: Select all

\r\n
Regards,

Simon Carter, http://DataMystic.com/forums/index.php
http://PredictBGL.com - Insulin dose calculator for Type 1 diabetes
http://DownloadPipe.com - 250,000 free software downloads
http://DetachPipe.com - send huge email attachments

xyber
Posts: 17
Joined: Sat Feb 20, 2010 5:34 am

Re: Extracting Specific Details

Postby xyber » Mon Mar 01, 2010 3:47 pm

Hi,

I now have this as results after weeding the unnecessary data.

Code: Select all

Organization Name: ASIAPAC DISTRIBUTION PTE LTD    
RCB Number:    198701200H    
Address:    219 Henderson Road #05-03 Henderson Industrial Park Singapore 159556    
Telephone:    (65) 6270 8281    
Fax:    (65) 6278 3104    
Email:    sales@asiapac.com.sg    
Website:    http://www.asiapac.com.sg    
Contact:    Mr Lee Hock Seng Managing Director    
Nature of Business:    Established IT distribution house in Asia-Pacific region.    
Categories:Classified Section -> COMPUTERSSponsored Link:   
INOVUUS TECHNOLOGIES PTE LTD    

How do i make it as such that it becomes like this in the csv format?
As in all labels(Organization Name, Address, etc) Comes onto the first row and the its corresponding information fill up the next row?

Thank you very much.

Rgrds.

User avatar
DataMystic Support
Site Admin
Posts: 2136
Joined: Mon Jun 30, 2003 12:32 pm
Location: Melbourne, Australia
Contact:

Re: Extracting Specific Details

Postby DataMystic Support » Mon Mar 01, 2010 4:39 pm

Code: Select all

Organization Name:[ longest 0+ spaces, capture(1+ not cr or lf) ]
RCB Number:[ longest 0+ spaces, capture(1+ not cr or lf) ]
Address:[ longest 0+ spaces, capture(1+ not cr or lf) ]
Telephone:[ longest 0+ spaces, capture(1+ not cr or lf) ]
Fax:[ longest 0+ spaces, capture(1+ not cr or lf) ]
Email:[ longest 0+ spaces, capture(1+ not cr or lf) ]
Website:[ longest 0+ spaces, capture(1+ not cr or lf) ]
Contact:[ longest 0+ spaces, capture(1+ not cr or lf) ]
Nature of Business:[ longest 0+ spaces, capture(1+ not cr or lf) ]
Categories:[ longest 0+ spaces, capture(1+ not cr or lf) ]  Sponsored Link:[ longest 0+ spaces, capture(0+ not cr or lf) ]


Replace with
$1,$2,$3,$4 etc
Regards,

Simon Carter, http://DataMystic.com/forums/index.php
http://PredictBGL.com - Insulin dose calculator for Type 1 diabetes
http://DownloadPipe.com - 250,000 free software downloads
http://DetachPipe.com - send huge email attachments

xyber
Posts: 17
Joined: Sat Feb 20, 2010 5:34 am

Re: Extracting Specific Details

Postby xyber » Mon Mar 01, 2010 5:25 pm

Hi,

I followed your advice but i still cannot get the output in two rows for my csv. Am i doing something wrong? I've included the filter file on the following URL. Please do let me know if i am doing it wrong. Thank you.

http://www.mediafire.com/file/zmjudgjjovk/newfilter.fll
What i need right now is to get the right hand data to be the header and the left to be below it.

Rgrds.

User avatar
DataMystic Support
Site Admin
Posts: 2136
Joined: Mon Jun 30, 2003 12:32 pm
Location: Melbourne, Australia
Contact:

Re: Extracting Specific Details

Postby DataMystic Support » Tue Mar 02, 2010 7:28 am

Not sure why you would want the data on the first line and the header below it, but if so, use the following replace string:

Code: Select all

$1,$2,$3,$4,$5,$6,$7,$8,$9,$10
Organization Name,RCB Number,Address,Telephone,Fax,Email,Website,Contact,Nature of Business,Categories
Regards,

Simon Carter, http://DataMystic.com/forums/index.php
http://PredictBGL.com - Insulin dose calculator for Type 1 diabetes
http://DownloadPipe.com - 250,000 free software downloads
http://DetachPipe.com - send huge email attachments

xyber
Posts: 17
Joined: Sat Feb 20, 2010 5:34 am

Re: Extracting Specific Details

Postby xyber » Tue Mar 02, 2010 10:21 am

Hi,

What i mean to say is this. A preview of the content that has already been formatted in '.txt' file as can be seen in the dottxt.jpg.
dottxt.JPG
This is how each file has been formatted
dottxt.JPG (13.59 KiB) Viewed 6678 times


A preview of how it looks like when imported into excel as a csv is this.
individualcsv.JPG
This is how it looks like when i import in the current formatted data
individualcsv.JPG (15.88 KiB) Viewed 6678 times


This is what i want it to look like as there are many files, i want it to be tabulated as such.
resultwanted.JPG
This is how i want it to be ultimately
resultwanted.JPG (18.99 KiB) Viewed 6678 times


Hence this is why i figured that if the format can be changed to two lines, it will be easier to import in excel as the headers are already set and all i need to do is just sort and remove duplicates. Or is there a shorter way? Thank you very much.

Rgrds.


Return to “TextPipe Tips and Tricks, Questions and Support”

Who is online

Users browsing this forum: No registered users and 1 guest