lxml insights

posted again because WordPress is BBBBBaddddd! at posting code!

I GIVE UP!!! I needto find someplace else to post code!

I am VERY SAD!

# "wordpress.com" "posting source code" "2021"  - Google Search
#   https://wordpress.com/tag/programming
# wordpress.com >> editor >> Block >> [HTML, HTML]
import sys, lxml.html, lxml.cssselect

# For more insights see [The lxml.etree Tutorial](https://lxml.de/tutorial.html)

def main():
    htmltree = lxml.html.fromstring(html_fragment)   
    # type(htmltree) => lxml.html.HtmlElement

    # There is no element.text() function     INSTEAD   lxml.html.tostring(element)
    print(lxml.html.tostring(htmltree))  # =>  b'<div><div class="chart_header">\n ...'
    
    # lxml Converts the tree   ===>   to lists of lists
    # [Elements are lists](lxml.de/tutorial.html#elements-are-lists)
    print(len(htmltree))       # 3
    print(len(htmltree[0]))    # 3
    print(len(htmltree[1]))    # 4
    print(len(htmltree[1][0])) # 0

    print(lxml.html.tostring(htmltree[1][0])) # b'<div class="chart_tabletitle1">CPU</div>    '
    print(lxml.html.tostring(htmltree[1][1])) # b'<div class="chart_tabletitle2">CPU Value (Mark/Price)</div>    '
    print(lxml.html.tostring(htmltree[1][2])) # b'<div class="chart_tabletitle4">CPU Mark</div>    '
    print(lxml.html.tostring(htmltree[1][3])) # b'<div class="chart_tabletitle3">Price (USD)</div>  '
    

    # [Elements carry attributes as a dict](https://lxml.de/tutorial.html#elements-carry-attributes-as-a-dict)
    print(htmltree[1][2].attrib['class'])  # chart_tabletitle4
    print(htmltree[1][3].attrib['class'])  # chart_tabletitle3




html_fragment = '''
<!-- at about line 2030 in view-source:https://www.cpubenchmark.net/high_end_cpus.html -->

  <div class="chart_header">
    <div class="chart_title">PassMark - CPU Mark</div>
    <div class="chart_subtitle">High End CPUs</div>
    <div class="chart_subtitle" style="font-size: small;">Updated 31st of May 2021</div>
  </div>
  <div class="chart_subheader">
    <div class="chart_tabletitle1">CPU</div>    <div class="chart_tabletitle2">CPU Value (Mark/Price)</div>    <div class="chart_tabletitle4">CPU Mark</div>    <div class="chart_tabletitle3">Price (USD)</div>  </div>
  <div class="chart_body">
    <ul class="chartlist">
<li id="pk4207"><span class="more_details" onclick="p(event, '87,767', 1, 12, 64, 2, 'NA');"><a class="name" href="cpu.php?cpu=AMD+EPYC+7763&amp;id=4207"></a></span><a href="cpu.php?cpu=AMD+EPYC+7763&amp;id=4207"><span class="prdname">AMD EPYC 7763</span><div><span class="index pink" style="width: 0%">(0%)</span></div><span class="count">NA</span><span class="mark-neww">87,767</span><span class="price-neww">NA</span></a></li>
<li id="pk3837"><span class="more_details" onclick="p(event, '86,096', 2, 19, 64, 2, '$5,489.99');"><a class="name" href="cpu.php?cpu=AMD+Ryzen+Threadripper+PRO+3995WX&amp;id=3837"></a></span><a href="cpu.php?cpu=AMD+Ryzen+Threadripper+PRO+3995WX&amp;id=3837"><span class="prdname">AMD Ryzen Threadripper PRO 3995WX</span><div><span class="index yellow" style="width: 46.5%">(46.5%)</span></div><span class="count">15.7</span><span class="mark-neww">86,096</span><span class="price-neww">$5,489.99</span></a></li>
<li id="pk4206"><span class="more_details" onclick="p(event, '85,887', 3, 16, 64, 2, 'NA');"><a class="name" href="cpu.php?cpu=AMD+EPYC+7713&amp;id=4206"></a></span><a href="cpu.php?cpu=AMD+EPYC+7713&amp;id=4206"><span class="prdname">AMD EPYC 7713</span><div><span class="index green" style="width: 0%">(0%)</span></div><span class="count">NA</span><span class="mark-neww">85,887</span><span class="price-neww">NA</span></a></li>
<li id="pk3674"><span class="more_details" onclick="p(event, '81,206', 4, 112, 64, 2, '$5,729.00*');"><a class="name" href="cpu.php?cpu=AMD+Ryzen+Threadripper+3990X&amp;id=3674"></a></span><a href="cpu.php?cpu=AMD+Ryzen+Threadripper+3990X&amp;id=3674"><span class="prdname">AMD Ryzen Threadripper 3990X</span><div><span class="index light-purple" style="width: 44.8%">(44.8%)</span></div><span class="count">14.2</span><span class="mark-neww">81,206</span><span class="price-neww">$5,729.00*</span></a></li>
<li id="pk4205"><span class="more_details" onclick="p(event, '77,101', 5, 3, 48, 2, 'NA');"><a class="name" href="cpu.php?cpu=AMD+EPYC+7643&amp;id=4205"></a></span><a href="cpu.php?cpu=AMD+EPYC+7643&amp;id=4205"><span class="prdname">AMD EPYC 7643</span><div><span class="index red" style="width: 0%">(0%)</span></div><span class="count">NA</span><span class="mark-neww">77,101</span><span class="price-neww">NA</span></a></li>
<li id="pk3719"><span class="more_details" onclick="p(event, '71,686', 6, 14, 64, 2, '$8,499.00');"><a class="name" href="cpu.php?cpu=AMD+EPYC+7702&amp;id=3719"></a></span><a href="cpu.php?cpu=AMD+EPYC+7702&amp;id=3719"><span class="prdname">AMD EPYC 7702</span><div><span class="index turquoise" style="width: 36.0%">(36.0%)</span></div><span class="count">8.4</span><span class="mark-neww">71,686</span><span class="price-neww">$8,499.00</span></a></li>
<li id="pk3555"><span class="more_details" onclick="p(event, '68,213', 7, 2, 64, 2, '$4,650.00*');"><a class="name" href="cpu.php?cpu=AMD+EPYC+7702P&amp;id=3555"></a></span><a href="cpu.php?cpu=AMD+EPYC+7702P&amp;id=3555"><span class="prdname">AMD EPYC 7702P</span><div><span class="index orange" style="width: 45.4%">(45.4%)</span></div><span class="count">14.7</span><span class="mark-neww">68,213</span><span class="price-neww">$4,650.00*</span></a></li>
</ul>
'''
if __name__ == '__main__':
    main()

#lxml, #python