lxml.html basic parse methods for [url, file, string]

lxml.html examples of parsing from

  • URLs
  • Files
  • Strings

URLs

import lxml.html
htmltree = lxml.html.parse('http://joecodeswell.com')

htmltree.xpath("//title")[0].text

'''
OUTPUT:
'JoeCodeswell.com'
'''

Files

N.B. Save ‘http://joecodeswell.com’ as a file named ‘JoeCodeswell.com.htm’.
Make sure to cd to the dir containing the file before running the following.

import lxml.html
htmltree = lxml.html.parse('JoeCodeswell.com.htm')

htmltree.xpath("//title")[0].text

'''
OUTPUT:
'JoeCodeswell.com'
'''

Strings

N.B. Save ‘http://joecodeswell.com’ as a file named ‘JoeCodeswell.com.htm’.
Make sure to cd to the dir containing the file before running the following.

import lxml.html

f = open('JoeCodeswell.com.htm', 'r'); the_string = f.read(); f.close()
htmltree = lxml.html.fromstring(the_string)

htmltree.xpath("//title")[0].text

'''
OUTPUT:
'JoeCodeswell.com'
'''

More lxml Syntax Examples

More lxml Syntax Examples

Continued from lxml HTML Scraping Syntax Examples

Content:

  • Python Code
  • Resulting Output

Python Code

#!/usr/local/bin/python2.7
# -*- coding: UTF-8 -*-
"""lxmlScrapingExamplesMore.py takes INURL [URL to an html file] Producing OUTFILEPATH [a scrapped text file]
Usage:   lxmlScrapingExamplesMore.py INURL                                                  OUTFILEPATH
Example: lxmlScrapingExamplesMore.py http://joecodeswell.org/examples/dlwebfiles/acl_attach.htm lxmlScrapingOutput.txt
"""
import sys,os

# joe professional opinion: package structure a bit goofy!   :)
import lxml, lxml.html


def lxmlScrapingExamples(myinurl, myoutfilepath):
    myinurl = 'http://joecodeswell.org/examples/dlwebfiles/acl_attach.htm' # what gets called
    print myinurl
    print myoutfilepath

    #Example 1 redo for myinurl new value 
    print "\n\nExample 1 - basic parsing of url"
    htmltree = lxml.html.parse(myinurl)
    # print "lxml.etree.tostring(htmltree, pretty_print=True) = %s"%(lxml.etree.tostring(htmltree, pretty_print=True))

    #Example 5 - xpath tag with class=value     N.B. backslashes for newLines, etc., DISAPPEAR in WordPress Marldown
    # see http://lxml.de/xpathxslt.html
    print "\n\nExample 5 - xpath tag with class=value"
    print """htmltree.xpath("//h1[@class='title topictitle1']")[0].text = %s"""%(htmltree.xpath("//h1[@class='title topictitle1']")[0].text)
    print """htmltree.xpath("//p[@class='shortdesc']")[0].text = %s"""%(htmltree.xpath("//p[@class='shortdesc']")[0].text)     
    print """len(htmltree.xpath("//var[@class='keyword varname']")) = %s"""%(len(htmltree.xpath("//var[@class='keyword varname']")))
    print """htmltree.xpath("//var[@class='keyword varname']")[0].text = %s"""%(htmltree.xpath("//var[@class='keyword varname']")[0].text)

    #Example 6 - parent   and   ElementVariables with   //  VS  .//   
    print "\n\nExample 6 - parent and ElementVariables"
    print """syntax_div = htmltree.xpath("//h2[@class='title sectiontitle']")[0].getparent() = %s"""%(htmltree.xpath("//h2[@class='title sectiontitle']")[0].getparent())
    syntax_div = htmltree.xpath("//h2[@class='title sectiontitle']")[0].getparent()
    print """syntax_div = %s"""%(syntax_div)
    print syntax_div_2string,'\n'
    print """syntax_div = %s"""%(syntax_div)  

    print "\n\nsyntax_div.xpath     //   VS  .//  \n\n"   
    print "// uses  htmltree"
    print """    syntax_div.xpath("count(//samp)") = %s"""%(syntax_div.xpath("count(//samp)"))  
    print '            equals\n'
    print """    htmltree.xpath("count(//samp)") = %s"""%(htmltree.xpath("count(//samp)"))  
    print """    syntax_div.xpath("count(//var)") = %s"""%(syntax_div.xpath("count(//var)"))  
    print '            equals'
    print """    htmltree.xpath("count(//var)") = %s"""%(htmltree.xpath("count(//var)"))  
    print '\nVS   .// uses  syntax_div ONLY'
    print """    htmltree.xpath("count(.//samp)") = %s"""%(htmltree.xpath("count(.//samp)"))  
    print """    syntax_div.xpath("count(.//var)") = %s"""%(syntax_div.xpath("count(.//var)")) 
    print "\n"    
    print syntax_div_ipython_discovery

    #Example 7 - xpath select element by text
    print "\n\nExample 7 - xpath select element by text"
    print """description_div = htmltree.xpath("//h2[@class='title sectiontitle' and text()='Description']")[0].getparent() = %s"""%(htmltree.xpath("//h2[@class='title sectiontitle' and text()='Description']")[0].getparent())
    description_div = htmltree.xpath("//h2[@class='title sectiontitle' and text()='Description']")[0].getparent()
    print """description_div = %s"""%(description_div)
    print description_div_2string,'\n'
    print """description_div.xpath("./p")[0].text = %s"""%(description_div.xpath("./p")[0].text)


    #Example 8 - get all text in an element
    print "\n\nExample 8 - get all text in element\nsee http://lxml.de/lxmlhtml.html#html-element-methods"
    print """xample_div = htmltree.xpath("//h2[@class='title sectiontitle' and text()='Example']")[0].getparent() = %s"""%(htmltree.xpath("//h2[@class='title sectiontitle' and text()='Example']")[0].getparent())    
    example_div = htmltree.xpath("//h2[@class='title sectiontitle' and text()='Example']")[0].getparent()
    print example_div_2string,'\n'
    print "example_div.text_content() = %s"%(example_div.text_content())


    #Example 9 - zip/dict   data terms & data definitions
    print "\n\nExample 9 - zipping data terms & data definitions"
    print """options_div = htmltree.xpath("//h2[@class='title sectiontitle' and text()='Options']")[0].getparent() = %s"""%(htmltree.xpath("//h2[@class='title sectiontitle' and text()='Options']")[0].getparent())    
    options_div = htmltree.xpath("//h2[@class='title sectiontitle' and text()='Options']")[0].getparent()
    print options_div_2string
    terms = [t.text for t in options_div.xpath("dl/dt/samp/var")]
    defs  = [d.text for d in options_div.xpath("dl/dd")]
    term_def_dict = dict(zip(terms, defs))
    print '\nterm_def_dict'
    for k, v in term_def_dict.iteritems():
        print '    %s: %s'%(k,v)


#print lxml.etree.tostring(options_div, pretty_print=True)

options_div_2string = """<div class="section">
  <h2 class="title sectiontitle">Options</h2>
  <dl class="dl">
    <dt class="dt dlterm">
      <samp class="ph codeph">
        <var class="keyword varname">acl_name</var>
      </samp>
    </dt>
    <dd class="dd">Specifies the ACL policy that is applied to the named object.
      The ACL policy must exist, or an error is displayed. 
      <p class="p">Examples of
      the ACL names are 
        <samp class="ph codeph">default-root</samp>, 
        <samp class="ph codeph">test</samp>, 
        <samp class="ph codeph">default-management</samp>,
        and 
        <samp class="ph codeph">pubs_acl3</samp>.</p>
    </dd>
    <dt class="dt dlterm">
      <samp class="ph codeph">
        <var class="keyword varname">object_name</var>
      </samp>
    </dt>
    <dd class="dd">Specifies the object to which to apply the named ACL policy. The
    object name must exist, or an error is displayed. 
      <p class="p">Examples of object
      names are:
      </p>
      <ul class="ul">
        <li class="li">
          <samp class="ph codeph">/Management/Groups/Travel</samp>
        </li>
        <li class="li">
          <samp class="ph codeph">/WebSEAL</samp>
        </li>
        <li class="li">
          <samp class="ph codeph">/Management</samp>
        </li>
      </ul>
    </dd>
  </dl>
</div>"""



example_div_2string = """<div class="example">
  <h2 class="title sectiontitle">Example</h2>
  <div class="p">The following example attaches the ACL policy, 
    <samp class="ph codeph">pubs_acl3</samp>, 
    to the protected object, 
    <samp class="ph codeph">/Management</samp>: 
    <pre class="pre codeblock">
      <code>pdadmin sec_master> acl attach /Management pubs_acl3</code>
    </pre>
  </div>
</div>
"""    

description_div_2string = """<div class="section">
  <h2 class="title sectiontitle">Syntax</h2>
  <p class="p">
    <span class="keyword cmdname">acl attach</span>
    <samp class="ph codeph">
      <var class="keyword varname">object_name</var></samp> 
    <samp class="ph codeph">
      <var class="keyword varname">acl_name</var>
    </samp>
  </p>
 </div>"""

syntax_div_2string = """<div class="section">
  <h2 class="title sectiontitle">Syntax</h2>
  <p class="p">
    <span class="keyword cmdname">acl attach</span> 
    <samp class="ph codeph">
      <var class="keyword varname">object_name</var>
    </samp> 
    <samp class="ph codeph">
      <var class="keyword varname">acl_name</var>
    </samp>
  </p>
 </div>"""

syntax_div_ipython_discovery = """In [54]: syntax_div.
syntax_div.addnext             syntax_div.get_element_by_id   syntax_div.keys
syntax_div.addprevious         syntax_div.getchildren         syntax_div.label
syntax_div.append              syntax_div.getiterator         syntax_div.make_links_absolut
syntax_div.attrib              syntax_div.getnext             syntax_div.makeelement
syntax_div.base                syntax_div.getparent           syntax_div.nsmap
syntax_div.base_url            syntax_div.getprevious         syntax_div.prefix
syntax_div.body                syntax_div.getroottree         syntax_div.remove
syntax_div.clear               syntax_div.head                syntax_div.replace
syntax_div.cssselect           syntax_div.index               syntax_div.resolve_base_href
syntax_div.drop_tag            syntax_div.insert              syntax_div.rewrite_links
syntax_div.drop_tree           syntax_div.items               syntax_div.set
syntax_div.extend              syntax_div.iter                syntax_div.sourceline
syntax_div.find                syntax_div.iterancestors       syntax_div.tag
syntax_div.find_class          syntax_div.iterchildren        syntax_div.tail
syntax_div.find_rel_links      syntax_div.iterdescendants     syntax_div.text
syntax_div.findall             syntax_div.iterfind            syntax_div.text_content
syntax_div.findtext            syntax_div.iterlinks           syntax_div.values
syntax_div.forms               syntax_div.itersiblings        syntax_div.xpath
syntax_div.get                 syntax_div.itertext
"""

NUM_ARGS = 2
def main():
    args = sys.argv[1:]
    if len(args) != NUM_ARGS or "-h" in args or "--help" in args:
        print __doc__
        s = raw_input('hit return to quit')
        sys.exit(2)
    lxmlScrapingExamples(args[0], args[1])

if __name__ == '__main__':
    main()

Resulting Output

>lxmlScrapingExamplesMore.py http://joecodeswell.org/examples/dlwebfiles/acl_attach.htm lxmlScrapingOutput.txt
http://joecodeswell.org/examples/dlwebfiles/acl_attach.htm
lxmlScrapingOutput.txt


Example 1 - basic parsing of url


Example 5 - xpath tag with class=value
htmltree.xpath("//h1[@class='title topictitle1']")[0].text = acl attach
htmltree.xpath("//p[@class='shortdesc']")[0].text = Attaches an ACL policy to a protected object. If the protected
object already has an ACL attached, the ACL is replaced with a new
one. 
len(htmltree.xpath("//var[@class='keyword varname']")) = 4
htmltree.xpath("//var[@class='keyword varname']")[0].text = object_name


Example 6 - parent and ElementVariables
syntax_div = htmltree.xpath("//h2[@class='title sectiontitle']")[0].getparent() = <Element div at 0xb7df00>
syntax_div = <Element div at 0xb7df00>
<div class="section">
  <h2 class="title sectiontitle">Syntax</h2>
  <p class="p">
    <span class="keyword cmdname">acl attach</span> 
    <samp class="ph codeph">
      <var class="keyword varname">object_name</var>
    </samp> 
    <samp class="ph codeph">
      <var class="keyword varname">acl_name</var>
    </samp>
  </p>
 </div> 

syntax_div = <Element div at 0xb7df00>


syntax_div.xpath     //   VS  .//  


// uses  htmltree
    syntax_div.xpath("count(//samp)") = 14.0
            equals

    htmltree.xpath("count(//samp)") = 14.0
    syntax_div.xpath("count(//var)") = 4.0
            equals
    htmltree.xpath("count(//var)") = 4.0

VS   .// uses  syntax_div ONLY
    htmltree.xpath("count(.//samp)") = 14.0
    syntax_div.xpath("count(.//var)") = 2.0


In [54]: syntax_div.
syntax_div.addnext             syntax_div.get_element_by_id   syntax_div.keys
syntax_div.addprevious         syntax_div.getchildren         syntax_div.label
syntax_div.append              syntax_div.getiterator         syntax_div.make_links_absolut
syntax_div.attrib              syntax_div.getnext             syntax_div.makeelement
syntax_div.base                syntax_div.getparent           syntax_div.nsmap
syntax_div.base_url            syntax_div.getprevious         syntax_div.prefix
syntax_div.body                syntax_div.getroottree         syntax_div.remove
syntax_div.clear               syntax_div.head                syntax_div.replace
syntax_div.cssselect           syntax_div.index               syntax_div.resolve_base_href
syntax_div.drop_tag            syntax_div.insert              syntax_div.rewrite_links
syntax_div.drop_tree           syntax_div.items               syntax_div.set
syntax_div.extend              syntax_div.iter                syntax_div.sourceline
syntax_div.find                syntax_div.iterancestors       syntax_div.tag
syntax_div.find_class          syntax_div.iterchildren        syntax_div.tail
syntax_div.find_rel_links      syntax_div.iterdescendants     syntax_div.text
syntax_div.findall             syntax_div.iterfind            syntax_div.text_content
syntax_div.findtext            syntax_div.iterlinks           syntax_div.values
syntax_div.forms               syntax_div.itersiblings        syntax_div.xpath
syntax_div.get                 syntax_div.itertext



Example 7 - xpath select element by text
description_div = htmltree.xpath("//h2[@class='title sectiontitle' and text()='Description']")[0].getparent() = <Element div at 0xd10e40>
description_div = <Element div at 0xd10e40>
<div class="section">
  <h2 class="title sectiontitle">Syntax</h2>
  <p class="p">
    <span class="keyword cmdname">acl attach</span>
    <samp class="ph codeph">
      <var class="keyword varname">object_name</var></samp> 
    <samp class="ph codeph">
      <var class="keyword varname">acl_name</var>
    </samp>
  </p>
 </div> 

description_div.xpath("./p")[0].text = At most, one ACL can be attached
to a given protected object. The same ACL can be attached to multiple
protected objects. Ensure that you are familiar with ACL management before you
use this function.


Example 8 - get all text in element
see http://lxml.de/lxmlhtml.html#html-element-methods
xample_div = htmltree.xpath("//h2[@class='title sectiontitle' and text()='Example']")[0].getparent() = <Element div at 0xd10e70>
<div class="example">
  <h2 class="title sectiontitle">Example</h2>
  <div class="p">The following example attaches the ACL policy, 
    <samp class="ph codeph">pubs_acl3</samp>, 
    to the protected object, 
    <samp class="ph codeph">/Management</samp>: 
    <pre class="pre codeblock">
      <code>pdadmin sec_master> acl attach /Management pubs_acl3</code>
    </pre>
  </div>
</div>


example_div.text_content() = ExampleThe following example attaches the
ACL policy, pubs_acl3, to the protected object, /Management: pdadmin sec_master> acl attach /Management pubs_acl3




Example 9 - zipping data terms & data definitions
options_div = htmltree.xpath("//h2[@class='title sectiontitle' and text()='Options']")[0].getparent() = <Element div at 0xd10d80>
<div class="section">
  <h2 class="title sectiontitle">Options</h2>
  <dl class="dl">
    <dt class="dt dlterm">
      <samp class="ph codeph">
        <var class="keyword varname">acl_name</var>
      </samp>
    </dt>
    <dd class="dd">Specifies the ACL policy that is applied to the named object.
      The ACL policy must exist, or an error is displayed. 
      <p class="p">Examples of
      the ACL names are 
        <samp class="ph codeph">default-root</samp>, 
        <samp class="ph codeph">test</samp>, 
        <samp class="ph codeph">default-management</samp>,
        and 
        <samp class="ph codeph">pubs_acl3</samp>.</p>
    </dd>
    <dt class="dt dlterm">
      <samp class="ph codeph">
        <var class="keyword varname">object_name</var>
      </samp>
    </dt>
    <dd class="dd">Specifies the object to which to apply the named ACL policy. The
    object name must exist, or an error is displayed. 
      <p class="p">Examples of object
      names are:
      </p>
      <ul class="ul">
        <li class="li">
          <samp class="ph codeph">/Management/Groups/Travel</samp>
        </li>
        <li class="li">
          <samp class="ph codeph">/WebSEAL</samp>
        </li>
        <li class="li">
          <samp class="ph codeph">/Management</samp>
        </li>
      </ul>
    </dd>
  </dl>
</div>

term_def_dict
    object_name: Specifies the object to which to apply the named ACL policy. The
object name must exist, or an error is displayed. 
    acl_name: Specifies the ACL policy that is applied to the named object.
The ACL policy must exist, or an error is displayed. 

>

lxml HTML Scraping Syntax Examples

lxml Syntax Examples

Content:

  • Python Code
  • Resulting Output

Python Code

#!/usr/local/bin/python2.7
# -*- coding: UTF-8 -*-
"""lxmlScrapingExamples.py takes INURL [URL to an html file] Producing OUTFILEPATH [a scrapped text file]
Usage:   lxmlScrapingExamples.py INURL                                                  OUTFILEPATH
Example: lxmlScrapingExamples.py http://joecodeswell.org/examples/dlwebfiles/htmlExample.html lxmlScrapingOutput.txt
"""
import sys

# joe professional opinion: package structure a bit goofy!   :)
import lxml, lxml.html


def lxmlScrapingExamples(myinurl, myoutfilepath):
    print myinurl
    print myoutfilepath

    #Example 1 - basic parsing of url - slightly altered from: http://stackoverflow.com/a/14303564/601770
    print "\n\nExample 1 - basic parsing of url"
    htmltree = lxml.html.parse(myinurl)
    print "lxml.etree.tostring(htmltree, pretty_print=True) = %s"%(lxml.etree.tostring(htmltree, pretty_print=True))



    #Example 2 - syntax examples [css_selector, xpath] - slightly altered from: http://stackoverflow.com/a/603630/601770
    print "\n\nExample 2 - syntax examples [css_selector, xpath]"
    # joe comment - i don't know why htmltree DOESN'T WORK DIRECTLY in this example it generates error:
    #     more lxml package/module/class/function assymetry?
    '''
    File "C:\1d\PythonPjs\kivyPjs\IBMsecurityAPIclientsPj\IBMsecurityAPIclient\ngExamples.py", line 28, in lxmlScrapingExamples
        for a in mySearchTree.cssselect('tr a'):
    AttributeError: 'lxml.etree._ElementTree' object has no attribute 'cssselect'    
    '''
    #mySearchTree = htmltree
    mySearchTree = lxml.html.fromstring(lxml.etree.tostring(htmltree))         
    # Find all 'a' elements inside 'tr' table rows with css selector
    print "Find all 'a' elements inside 'tr' table rows with css selector"
    for itm in mySearchTree.cssselect('tr a'):
        print 'found "%s" link to href "%s"' % (itm.text, itm.get('href'))    
    # Find all 'a' elements inside 'tr' table rows with xpath
    print "Find all 'a' elements inside 'tr' table rows with xpath"
    for itm in mySearchTree.xpath('.//tr/*/a'):
        print 'found "%s" link to href "%s"' % (itm.text, itm.get('href'))

    #Example 3 - syntax examples [xpath, .findall(), .getchildren()] - slightly altered from: http://stackoverflow.com/a/9920703/601770
    print "\n\nExample 3 - syntax examples [xpath, .findall(), .getchildren()] "
    page = htmltree
    rows = page.xpath("body/table")[1].findall("tr")   # table [1] is the 2nd table in MY example html
    data = list()
    for row in rows:
        data.append([c.text for c in row.getchildren()])
    for itm in data[4:]: print(itm)

    #Example 4 - following sibling [] - slightly altered from: http://stackoverflow.com/questions/3139402/how-to-select-following-sibling-xml-tag-using-xpath
    print "\n\nExample 4 - following sibling []"
    sibEx = '''
    <html>
    <head>
    <title>following sibling</title>
    </head>
    <body>
    <table border>    
    <tr>
        <td class="name">Brand</td>
        <td class="desc">Intel</td>
    </tr>
    <tr>
        <td class="name">Series</td>
        <td class="desc">Core i5</td>
    </tr>
    <tr>
        <td class="name">Cores</td>
        <td class="desc">4</td>
    </tr>
    <tr>
        <td class="name">Socket</td>
        <td class="desc">LGA 1156</td>    
    </tr>

    <tr>
        <td class="name">Brand</td>
        <td class="desc">AMD</td>
    </tr>
    <tr>
        <td class="name">Series</td>
        <td class="desc">Phenom II X4</td>
    </tr>
    <tr>
        <td class="name">Cores</td>
        <td class="desc">4</td>
    </tr>
    <tr>
        <td class="name">Socket</td>
        <td class="desc">Socket AM3</td>
    </tr>
    </table>
    </body>
    </html>    
    '''
    parsedDocument = lxml.html.fromstring(sibEx)

    # bad
    #rlist = parsedDocument.xpath("tr[td[@class='name'] ='Brand']")
    #rlist = parsedDocument.xpath("tr[td[@class='name'] ='Brand']/td[@class='desc']")
    #r = parsedDocument.xpath(tr/td[@class="name"])=='Brand')
    # r = parsedDocument.tr[td[@class='name'] ='Brand'].text
    #r = parsedDocument.tr[td[@class='name'] ='Brand']/td[@class='desc'].text
    #if(parsedDocument.xpath(tr/td[@class="name"])=='Brand'):

    # good
    #print "parsedDocument.xpath('/html/body/table/tr') = %s"%(parsedDocument.xpath('/html/body/table/tr'))
    print """parsedDocument.xpath("//tr[td[@class='name'] ='Brand']/td[@class='desc']") = %s"""%(parsedDocument.xpath("//tr[td[@class='name'] ='Brand']/td[@class='desc']"))
    print """parsedDocument.xpath("//tr[td[@class='name'] ='Brand']/td[@class='desc']")[0].text = %s"""%(parsedDocument.xpath("//tr[td[@class='name'] ='Brand']/td[@class='desc']")[0].text)



    print '\n\n\n'


NUM_ARGS = 2
def main():
    args = sys.argv[1:]
    if len(args) != NUM_ARGS or "-h" in args or "--help" in args:
        print __doc__
        s = raw_input('hit return to quit')
        sys.exit(2)
    lxmlScrapingExamples(args[0], args[1])

if __name__ == '__main__':
    main()

Resulting Output

>lxmlScrapingExamples.py http://joecodeswell.org/examples/dlwebfiles/htmlExample.html lxmlScrapingOutput.txt
http://joecodeswell.org/examples/dlwebfiles/htmlExample.html
lxmlScrapingOutput.txt


Example 1 - basic parsing of url
lxml.etree.tostring(htmltree, pretty_print=True) = <!DOCTYPE html>
<html>
  <head>
    <meta http-equiv="content-type" content="text/html; charset=windows-1252"/>
    <title>lxml htmlExamples.html</title>
  </head>
  <body>
    <h1>lxml htmlExamples.html for Joe Codeswell examples - dlwebfiles</h1>

    <h2>Example 1</h2>
    <ul><li><a href="http://joecodeswell.org/examples/dlwebfiles/aveverum.mid">aveverum.mid</a></li>
      <li><a href="http://joecodeswell.org/examples/dlwebfiles/carol.mid">carol.mid</a></li>
      <li><a href="http://joecodeswell.org/examples/dlwebfiles/steiner.mid">steiner.mid</a></li>
    </ul><h2>Example 2</h2>
    <table align="left" border="0" cellspacing="0" cellpadding="0" width="100%"><tr align="left" valign="top"><th>Name</th>
        <th>File Name & Link</th>
      </tr><tr align="left" valign="top"><td>Ave Verum</td><td><a href="http://joecodeswell.org/examples/dlwebfiles/aveverum.mid">aveverum.mid</a></td></tr><tr align="left" valign="top"><td>A Carol</td><td><a href="http://joecodeswell.org/examples/dlwebfiles/carol.mid.mid">carol.mid</a></td></tr><tr align="left" valign="top"><td>Steiner Amen?</td><td><a href="http://joecodeswell.org/examples/dlwebfiles/steiner.mid">steiner.mid</a></td></tr></table><h2>Example 3</h2>
    <table border=""><tr align="LEFT"><th colspan="38">Main Subject</th>
    </tr><tr align="LEFT"><th colspan="2"> </th>

    <th valign="TOP" colspan="18">part1</th>
    <th valign="TOP" colspan="18">part2</th>
    </tr><tr align="LEFT"><th colspan="2"> </th>
    <th valign="TOP" colspan="9">sub-part1</th>
    <th valign="TOP" colspan="9">sub-part2</th>
    <th valign="TOP" colspan="9">sub-part3</th>
    <th valign="TOP" colspan="9">sub-part4</th>
    </tr><tr align="LEFT"><th colspan="2"> </th>
    <th valign="TOP" colspan="1">subject1</th>
    <th valign="TOP" colspan="1">subject2</th>

    <th valign="TOP" colspan="1">subject10</th>
    <th valign="TOP" colspan="1">subject11</th>
    <th valign="TOP" colspan="1">subject12</th>
    <th valign="TOP" colspan="1">subject13</th>
    <th valign="TOP" colspan="1">subject14</th>
    <th valign="TOP" colspan="1">subject15</th>
    <th valign="TOP" colspan="1">subject16</th>

    <th valign="TOP" colspan="1">subject17</th>
    <th valign="TOP" colspan="1">subject18</th>
    <th valign="TOP" colspan="1">subject19</th>
    <th valign="TOP" colspan="1">subject20</th>
    <th valign="TOP" colspan="1">subject21</th>
    <th valign="TOP" colspan="1">subject22</th>
    <th valign="TOP" colspan="1">subject23</th>
    <th valign="TOP" colspan="1">subject24</th>
    <th valign="TOP" colspan="1">subject25</th>

    <th valign="TOP" colspan="1">subject26</th>
    <th valign="TOP" colspan="1">subject27</th>
    <th valign="TOP" colspan="1">subject28</th>
    <th valign="TOP" colspan="1">subject29</th>
    <th valign="TOP" colspan="1">subject30</th>
    <th valign="TOP" colspan="1">subject31</th>
    <th valign="TOP" colspan="1">subject32</th>
    <th valign="TOP" colspan="1">subject33</th>
    <th valign="TOP" colspan="1">subject34</th>

    <th valign="TOP" colspan="1">subject35</th>
    <th valign="TOP" colspan="1">subject36</th>
    </tr><tr align="RIGHT"><th align="LEFT" valign="TOP" rowspan="12">2050</th>
    <th align="LEFT">January</th>
    <td>0</td>
    <td>1</td>
    <td>3</td>
    <td>0</td>

    <td>4</td>
    <td>16</td>
    <td>0</td>
    <td>6</td>
    <td>2</td>
    <td>2</td>
    <td>0</td>
    <td>3</td>
    <td>0</td>

    <td>3</td>
    <td>2</td>
    <td>0</td>
    <td>26</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>7</td>
    <td>0</td>

    <td>5</td>
    <td>6</td>
    <td>0</td>
    <td>8</td>
    <td>2</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>

    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>2</td>
    <td>0</td>
    </tr><tr align="RIGHT"><th align="LEFT">February</th>
    <td>1</td>
    <td>0</td>

    <td>8</td>
    <td>0</td>
    <td>2</td>
    <td>4</td>
    <td>1</td>
    <td>6</td>
    <td>1</td>
    <td>2</td>
    <td>0</td>

    <td>3</td>
    <td>0</td>
    <td>0</td>
    <td>4</td>
    <td>0</td>
    <td>25</td>
    <td>0</td>
    <td>0</td>
    <td>1</td>

    <td>2</td>
    <td>0</td>
    <td>4</td>
    <td>14</td>
    <td>1</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>

    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    </tr><tr align="RIGHT"><th align="LEFT">March</th>

    <td>0</td>
    <td>0</td>
    <td>4</td>
    <td>0</td>
    <td>4</td>
    <td>7</td>
    <td>0</td>
    <td>9</td>
    <td>2</td>

    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>2</td>
    <td>9</td>
    <td>0</td>
    <td>45</td>
    <td>1</td>

    <td>0</td>
    <td>0</td>
    <td>7</td>
    <td>0</td>
    <td>10</td>
    <td>16</td>
    <td>0</td>
    <td>5</td>
    <td>1</td>

    <td>1</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>4</td>
    <td>0</td>

    </tr><tr align="RIGHT"><th align="LEFT">April</th>
    <td>1</td>
    <td>0</td>
    <td>5</td>
    <td>0</td>
    <td>3</td>
    <td>12</td>
    <td>1</td>

    <td>11</td>
    <td>0</td>
    <td>3</td>
    <td>0</td>
    <td>3</td>
    <td>0</td>
    <td>0</td>
    <td>3</td>
    <td>2</td>

    <td>34</td>
    <td>0</td>
    <td>0</td>
    <td>1</td>
    <td>2</td>
    <td>0</td>
    <td>6</td>
    <td>18</td>
    <td>1</td>

    <td>3</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>

    <td>5</td>
    <td>1</td>
    </tr><tr align="RIGHT"><th align="LEFT">May</th>
    <td>7</td>
    <td>0</td>
    <td>6</td>
    <td>0</td>
    <td>8</td>

    <td>4</td>
    <td>1</td>
    <td>13</td>
    <td>0</td>
    <td>0</td>
    <td>2</td>
    <td>2</td>
    <td>0</td>
    <td>1</td>

    <td>7</td>
    <td>1</td>
    <td>30</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>7</td>
    <td>0</td>
    <td>5</td>

    <td>12</td>
    <td>0</td>
    <td>4</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>

    <td>0</td>
    <td>0</td>
    <td>6</td>
    <td>1</td>
    </tr><tr align="RIGHT"><th align="LEFT">June</th>
    <td>0</td>
    <td>1</td>
    <td>14</td>

    <td>0</td>
    <td>7</td>
    <td>15</td>
    <td>0</td>
    <td>17</td>
    <td>1</td>
    <td>2</td>
    <td>0</td>
    <td>5</td>

    <td>0</td>
    <td>1</td>
    <td>3</td>
    <td>0</td>
    <td>24</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>5</td>

    <td>0</td>
    <td>6</td>
    <td>13</td>
    <td>1</td>
    <td>9</td>
    <td>1</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>

    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>2</td>
    <td>1</td>
    </tr><tr align="RIGHT"><th align="LEFT">July</th>
    <td>0</td>

    <td>1</td>
    <td>6</td>
    <td>0</td>
    <td>8</td>
    <td>17</td>
    <td>1</td>
    <td>15</td>
    <td>2</td>
    <td>1</td>

    <td>0</td>
    <td>10</td>
    <td>0</td>
    <td>2</td>
    <td>15</td>
    <td>2</td>
    <td>53</td>
    <td>0</td>
    <td>3</td>

    <td>3</td>
    <td>6</td>
    <td>0</td>
    <td>7</td>
    <td>16</td>
    <td>0</td>
    <td>9</td>
    <td>1</td>
    <td>1</td>

    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>2</td>
    <td>0</td>
    </tr><tr align="RIGHT"><th align="LEFT">August</th>
    <td>2</td>
    <td>0</td>
    <td>5</td>
    <td>0</td>
    <td>8</td>
    <td>15</td>
    <td>1</td>

    <td>17</td>
    <td>0</td>
    <td>2</td>
    <td>0</td>
    <td>2</td>
    <td>0</td>
    <td>5</td>
    <td>16</td>
    <td>0</td>

    <td>33</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>11</td>
    <td>0</td>
    <td>2</td>
    <td>25</td>
    <td>4</td>

    <td>8</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>

    <td>3</td>
    <td>0</td>
    </tr><tr align="RIGHT"><th align="LEFT">September</th>
    <td>2</td>
    <td>0</td>
    <td>10</td>
    <td>0</td>
    <td>16</td>

    <td>22</td>
    <td>2</td>
    <td>19</td>
    <td>4</td>
    <td>2</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>2</td>

    <td>8</td>
    <td>0</td>
    <td>27</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>8</td>
    <td>0</td>
    <td>11</td>

    <td>31</td>
    <td>1</td>
    <td>9</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>

    <td>0</td>
    <td>1</td>
    <td>1</td>
    <td>0</td>
    </tr><tr align="RIGHT"><th align="LEFT">October</th>
    <td>3</td>
    <td>1</td>
    <td>8</td>

    <td>0</td>
    <td>4</td>
    <td>28</td>
    <td>0</td>
    <td>15</td>
    <td>2</td>
    <td>1</td>
    <td>0</td>
    <td>1</td>

    <td>0</td>
    <td>1</td>
    <td>6</td>
    <td>0</td>
    <td>15</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>3</td>

    <td>0</td>
    <td>9</td>
    <td>26</td>
    <td>1</td>
    <td>8</td>
    <td>4</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>

    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    </tr><tr align="RIGHT"><th align="LEFT">November</th>
    <td>0</td>

    <td>3</td>
    <td>3</td>
    <td>0</td>
    <td>6</td>
    <td>23</td>
    <td>1</td>
    <td>8</td>
    <td>1</td>
    <td>2</td>

    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>3</td>
    <td>7</td>
    <td>1</td>
    <td>20</td>
    <td>0</td>
    <td>0</td>

    <td>0</td>
    <td>8</td>
    <td>0</td>
    <td>3</td>
    <td>18</td>
    <td>3</td>
    <td>7</td>
    <td>0</td>
    <td>0</td>

    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>3</td>
    <td>0</td>
    </tr><tr align="RIGHT"><th align="LEFT">December</th>
    <td>1</td>
    <td>0</td>
    <td>4</td>
    <td>0</td>
    <td>4</td>
    <td>13</td>
    <td>2</td>

    <td>15</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>2</td>
    <td>0</td>
    <td>1</td>
    <td>2</td>
    <td>0</td>

    <td>29</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>7</td>
    <td>0</td>
    <td>3</td>
    <td>20</td>
    <td>1</td>

    <td>13</td>
    <td>0</td>
    <td>1</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>
    <td>0</td>

    <td>3</td>
    <td>0</td>
    </tr></table></body>
</html>



Example 2 - syntax examples [css_selector, xpath]
Find all 'a' elements inside 'tr' table rows with css selector
found "aveverum.mid" link to href "http://joecodeswell.org/examples/dlwebfiles/aveverum.mid"
found "carol.mid" link to href "http://joecodeswell.org/examples/dlwebfiles/carol.mid.mid"
found "steiner.mid" link to href "http://joecodeswell.org/examples/dlwebfiles/steiner.mid"
Find all 'a' elements inside 'tr' table rows with xpath
found "aveverum.mid" link to href "http://joecodeswell.org/examples/dlwebfiles/aveverum.mid"
found "carol.mid" link to href "http://joecodeswell.org/examples/dlwebfiles/carol.mid.mid"
found "steiner.mid" link to href "http://joecodeswell.org/examples/dlwebfiles/steiner.mid"


Example 3 - syntax examples [xpath, .findall(), .getchildren()] 
['2050', 'January', '0', '1', '3', '0', '4', '16', '0', '6', '2', '2', '0', '3', '0', '3', '2', '0', '26', '1', '0', '0', '7', '0', '5', '6', '0', '8', '2', '0', '0', '0', '0', '0', '0', '0', '2', '0']
['February', '1', '0', '8', '0', '2', '4', '1', '6', '1', '2', '0', '3', '0', '0', '4', '0', '25', '0', '0', '1', '2', '0', '4', '14', '1', '1', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0']
['March', '0', '0', '4', '0', '4', '7', '0', '9', '2', '1', '0', '0', '0', '2', '9', '0', '45', '1', '0', '0', '7', '0', '10', '16', '0', '5', '1', '1', '0', '1', '0', '0', '0', '0', '4', '0']
['April', '1', '0', '5', '0', '3', '12', '1', '11', '0', '3', '0', '3', '0', '0', '3', '2', '34', '0', '0', '1', '2', '0', '6', '18', '1', '3', '0', '0', '0', '0', '0', '0', '0', '0', '5', '1']
['May', '7', '0', '6', '0', '8', '4', '1', '13', '0', '0', '2', '2', '0', '1', '7', '1', '30', '0', '0', '0', '7', '0', '5', '12', '0', '4', '1', '0', '0', '0', '0', '0', '0', '0', '6', '1']
['June', '0', '1', '14', '0', '7', '15', '0', '17', '1', '2', '0', '5', '0', '1', '3', '0', '24', '0', '0', '0', '5', '0', '6', '13', '1', '9', '1', '1', '0', '0', '0', '0', '0', '0', '2', '1']
['July', '0', '1', '6', '0', '8', '17', '1', '15', '2', '1', '0', '10', '0', '2', '15', '2', '53', '0', '3', '3', '6', '0', '7', '16', '0', '9', '1', '1', '0', '0', '0', '0', '1', '0', '2', '0']
['August', '2', '0', '5', '0', '8', '15', '1', '17', '0', '2', '0', '2', '0', '5', '16', '0', '33', '0', '0', '0', '11', '0', '2', '25', '4', '8', '0', '0', '0', '1', '0', '0', '0', '0', '3', '0']
['September', '2', '0', '10', '0', '16', '22', '2', '19', '4', '2', '0', '0', '0', '2', '8', '0', '27', '0', '1', '0', '8', '0', '11', '31', '1', '9', '0', '0', '0', '1', '0', '0', '0', '1', '1', '0']
['October', '3', '1', '8', '0', '4', '28', '0', '15', '2', '1', '0', '1', '0', '1', '6', '0', '15', '0', '1', '0', '3', '0', '9', '26', '1', '8', '4', '0', '0', '0', '0', '0', '0', '0', '1', '0']
['November', '0', '3', '3', '0', '6', '23', '1', '8', '1', '2', '0', '1', '0', '3', '7', '1', '20', '0', '0', '0', '8', '0', '3', '18', '3', '7', '0', '0', '0', '0', '0', '0', '0', '0', '3', '0']
['December', '1', '0', '4', '0', '4', '13', '2', '15', '1', '0', '0', '2', '0', '1', '2', '0', '29', '0', '1', '0', '7', '0', '3', '20', '1', '13', '0', '1', '0', '0', '0', '0', '0', '0', '3', '0']


Example 4 - following sibling []
parsedDocument.xpath("//tr[td[@class='name'] ='Brand']/td[@class='desc']") = [<Element td at 0xda53c0>, <Element td at 0xda5390>]
parsedDocument.xpath("//tr[td[@class='name'] ='Brand']/td[@class='desc']")[0].text = Intel

>

Ways to split longer WordPress posts

Joe:

These are ways to make longer posts easier to digest for readers. They are ways to split longer posts to keep readers engaged.

Originally posted on The Daily Post:

We often think that our attention spans have grown shorter with the onslaught of digital media, but in fact longform writing — on WordPress.com and beyond — is alive and well. It’s sometimes challenging, however, to display longer pieces in a way that keeps your readers engaged.

If you’re looking for tips on presenting your latest longform creation, this post from last year, by Daily Post contributor Elizabeth, will introduce you to some nifty features built into your site. Whether you’re working on a meaty piece of prose for Blogging U.’s Writing 201 course, or just often have a lot to say, you should try these out.

Today, we’ll cover three features that can help you break up and organize longer posts, so that they display more cleanly and are easier for your readers to digest. We hope these tips come in handy!

Pagination

Longform posts are…

View original 697 more words

Easy to Understand web2py Grid Custom Search

web2py Grid Custom Search WITHOUT specifying a custom search_widget

The custom_search.html view contains the EASIER TO UNDERSTAND customization code. Here is the technique.

  1. Make the SQLFORM.grid’s Standard Search Input hidden.
  2. Define Custom Search Input elements with onchange events that send their values to the to the hidden Standard Search Input.
  3. Insert the Custom Search Input elements after the Standard Search Input (“#w2p_keywords”) using jQuery .insertAfter().
    • This prevents them from showing up on Edit or View pages.
    • Insert them in reverse order of them appearing on the page.

You can find an older version of this on web2pyslices.com

Here is the Controller code. Note the absence of a custom search_widget argument in the grid function call.

# in default.py Controller
def custom_search():
    '''
    Implements SQLFORM.grid custom search 
        WITHOUT specifying a custom search_widget,
            and so needing to read & understand the clever web2py implementation source code.
    The custom_search.html view contains the EASIER TO UNDERSTAND customization code.
    The technique:
        1. Make the grid's Standard Search Input hidden.
        2. Define Custom Search Input elements 
            with onchange events that 
                send their values to the to the hidden Standard Search Input.
    '''
    query=((db.contact.id > 0))
    fields = (db.contact.id, 
        db.contact.l_name, 
        db.contact.f_name, 
        db.contact.prime_phone,
        db.contact.date_modified,
        )

    headers = {'contact.id':   'ID',
           'contact.l_name': 'Last Name',
           'contact.f_name': 'First Name',
           'contact.prime_phone': 'Primary Phone',
           'contact.date_modified': 'Info Last Updated',
           }    
    init_sort_order=[db.contact.l_name]   

    grid = SQLFORM.grid(query=query, 
        fields=fields, 
        headers=headers, 
        orderby=init_sort_order,
        searchable=True,  
        user_signature=False, 
        create=True, deletable=False, editable=True, maxtextlength=100, paginate=25)

    return dict(grid=grid)    

Here is the View code.

<!-- In custom_search.html view -->
{{extend 'layout.html'}}
{{block head}}
{{super}}
<script>

function phoneSrch(){
    var srch ='contact.prime_phone contains '+'"'+jQuery('#joephone').val()+'"';
    $("#w2p_keywords").val(srch);
}
function lnameSrch(){
    var srch ='contact.l_name starts with '+'"'+jQuery('#joelname').val()+'"';
    $("#w2p_keywords").val(srch);
}

$(document).ready(function(){
  // Make the Grid Standard Search Input hidden  
  $("#w2p_keywords").prop("type", "hidden");   

  // Insert the Custom Search Input elements after 
  //     the Standard Search Input ("#w2p_keywords")
  //     using jQuery .insertAfter().
  //     This prevents them from showing up on Edit or View pages.
  //     Insert them in reverse order of them appearing on the page.
  var input2Str  = '<div class="joeinputclass" style="padding-bottom:10px;" >';
  input2Str += '<span class="joelabelclass" >Primary Phone contains: ';
  input2Str += '</span><input name="joephone" id="joephone" type="text" ';
  input2Str += 'onchange="phoneSrch()" style="width:150px;" ><br/></div>';
  $(input2Str).insertAfter("#w2p_keywords");
  var input1Str  = '<div class="joeinputclass" style="padding-bottom:10px;">';
  input1Str += '<span class="joelabelclass" style="padding-right:18px;" >';
  input1Str += 'Last Name starts with: </span><input name="joelname" ';
  input1Str += 'id="joelname" type="text"  onchange="lnameSrch()" ';
  input1Str += 'style="width:150px;" ></div>';
  $(input1Str).insertAfter("#w2p_keywords");
});

</script>
{{end}}
<h2>Contacts</h2>
<div id="theweb2pygrid">
{{=grid}}
</div>

Good contact Table

##Contact Table Def
Here’s a good contact table def i want to remember.
– Automatic change for Info Last Updated datetime field.
– Uses represent

db.define_table("contact",
    Field("f_name", "string",  requires=IS_NOT_EMPTY() ),
    Field("l_name", "string", requires=IS_NOT_EMPTY() ),
    Field("m_initial", "string", default=None),
    Field('prime_phone', requires=IS_NOT_EMPTY(), comment='phone eg: (650) 555-1212'),
    Field('other_phone',   comment='phone eg: (650) 555-1212'),
    Field('email', requires=IS_EMPTY_OR(IS_EMAIL()) ),    
    #Field('ca_address', 'reference ca_address', writable=False, readable=False),
    Field("address_line1", "string", requires=IS_NOT_EMPTY() ),
    Field("address_line2", "string"),
    Field("city", "string", requires=IS_NOT_EMPTY() ),
    Field("state", "string", length=2, default='CA', writable=False),
    Field("zip", "string", length=5, default=None),
    Field("date_modified", "datetime", default=datetime.datetime.now(),
        update=request.now,   # working - needed below line
        writable=False, readable=True,
    # http://strftime.org/   AND http://stackoverflow.com/questions/9678172/modify-column-output-for-sqlform-grid-in-web2py
        represent=lambda x, row: x.strftime("%c"),
        comment='Last change date of contact info.'),

      ) 

Debug web2py Actions with iPython

My Notes re debugging web2py Actions with iPython

My system

  • Using web2py 2.9.5-stable+timestamp.2014.03.16.02.35.39 (Running on Rocket 1.2.6, Python 2.7.5)
  • On Windows XP in C:\web2py

iPython log

Python 2.7.5 (default, May 15 2013, 22:43:36) [MSC v.1500 32 bit (Intel)]
Type "copyright", "credits" or "license" for more information.

IPython 0.13.1 -- An enhanced Interactive Python.
?         -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help      -> Python's own help system.
object?   -> Details about 'object', use 'object??' for extra details.

In [1]: cd C:\web2py
C:\web2py

In [2]: from gluon import shell

In [3]: shell.run('fish/default/view_or_add_client_trips/?client_id=2', plain=True, import_models=True)
{'grid': <gluon.html.DIV object at 0x0289C930>}

In [4]: