mirror of https://github.com/bvn13/soupselect.git
Added monkeypatch and unmonkeypatch, for patching a findSelect method on to the Soup class itself.
parent
9a6bf542fe
commit
2960a5f11b
|
@ -12,8 +12,6 @@ select(soup, 'div#main ul a')
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup as Soup
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
tag_re = re.compile('^[a-z0-9]+$')
|
tag_re = re.compile('^[a-z0-9]+$')
|
||||||
|
@ -53,6 +51,10 @@ def attribute_checker(operator, attribute, value=''):
|
||||||
|
|
||||||
|
|
||||||
def select(soup, selector):
|
def select(soup, selector):
|
||||||
|
"""
|
||||||
|
soup should be a BeautifulSoup instance; selector is a CSS selector
|
||||||
|
specifying the elements you want to retrieve.
|
||||||
|
"""
|
||||||
tokens = selector.split()
|
tokens = selector.split()
|
||||||
current_context = [soup]
|
current_context = [soup]
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
|
@ -107,3 +109,17 @@ def select(soup, selector):
|
||||||
found.extend(context.findAll(token))
|
found.extend(context.findAll(token))
|
||||||
current_context = found
|
current_context = found
|
||||||
return current_context
|
return current_context
|
||||||
|
|
||||||
|
def monkeypatch(BeautifulSoupClass=None):
|
||||||
|
"""
|
||||||
|
If you don't explicitly state the class to patch, defaults to the most
|
||||||
|
common import location for BeautifulSoup.
|
||||||
|
"""
|
||||||
|
if not BeautifulSoupClass:
|
||||||
|
from BeautifulSoup import BeautifulSoup as BeautifulSoupClass
|
||||||
|
BeautifulSoupClass.findSelect = select
|
||||||
|
|
||||||
|
def unmonkeypatch(BeautifulSoupClass=None):
|
||||||
|
if not BeautifulSoupClass:
|
||||||
|
from BeautifulSoup import BeautifulSoup as BeautifulSoupClass
|
||||||
|
delattr(BeautifulSoupClass, 'findSelect')
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
import unittest
|
import unittest
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
from soupselect import select
|
from soupselect import select, monkeypatch, unmonkeypatch
|
||||||
|
|
||||||
class TestBasicSelectors(unittest.TestCase):
|
class BaseTest(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.soup = BeautifulSoup(HTML)
|
self.soup = BeautifulSoup(HTML)
|
||||||
|
|
||||||
|
@ -22,6 +23,8 @@ class TestBasicSelectors(unittest.TestCase):
|
||||||
def assertSelectMultiple(self, *tests):
|
def assertSelectMultiple(self, *tests):
|
||||||
for selector, expected_ids in tests:
|
for selector, expected_ids in tests:
|
||||||
self.assertSelect(selector, expected_ids)
|
self.assertSelect(selector, expected_ids)
|
||||||
|
|
||||||
|
class TestBasicSelectors(BaseTest):
|
||||||
|
|
||||||
def test_one_tag_one(self):
|
def test_one_tag_one(self):
|
||||||
els = select(self.soup, 'title')
|
els = select(self.soup, 'title')
|
||||||
|
@ -91,6 +94,8 @@ class TestBasicSelectors(unittest.TestCase):
|
||||||
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
|
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
|
||||||
self.assertSelects(selector, ['pmulti'])
|
self.assertSelects(selector, ['pmulti'])
|
||||||
|
|
||||||
|
class TestAttributeSelectors(BaseTest):
|
||||||
|
|
||||||
def test_attribute_equals(self):
|
def test_attribute_equals(self):
|
||||||
self.assertSelectMultiple(
|
self.assertSelectMultiple(
|
||||||
('p[class="onep"]', ['p1']),
|
('p[class="onep"]', ['p1']),
|
||||||
|
@ -197,6 +202,54 @@ class TestBasicSelectors(unittest.TestCase):
|
||||||
('p[blah]', []),
|
('p[blah]', []),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
class TestMonkeyPatch(BaseTest):
|
||||||
|
|
||||||
|
def assertSelectMultipleExplicit(self, soup, *tests):
|
||||||
|
for selector, expected_ids in tests:
|
||||||
|
el_ids = [el['id'] for el in soup.findSelect(selector)]
|
||||||
|
el_ids.sort()
|
||||||
|
expected_ids.sort()
|
||||||
|
self.assertEqual(expected_ids, el_ids,
|
||||||
|
"Selector %s, expected [%s], got [%s]" % (
|
||||||
|
selector, ', '.join(expected_ids), ', '.join(el_ids)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_monkeypatch_explicit(self):
|
||||||
|
soup = BeautifulSoup(HTML)
|
||||||
|
self.assertRaises(TypeError, soup.findSelect, '*')
|
||||||
|
|
||||||
|
monkeypatch(BeautifulSoup)
|
||||||
|
|
||||||
|
self.assert_(soup.findSelect('*'))
|
||||||
|
self.assertSelectMultipleExplicit(soup,
|
||||||
|
('link', ['l1']),
|
||||||
|
('div#main', ['main']),
|
||||||
|
('div div', ['inner']),
|
||||||
|
)
|
||||||
|
|
||||||
|
unmonkeypatch(BeautifulSoup)
|
||||||
|
|
||||||
|
self.assertRaises(TypeError, soup.findSelect, '*')
|
||||||
|
|
||||||
|
def test_monkeypatch_implicit(self):
|
||||||
|
soup = BeautifulSoup(HTML)
|
||||||
|
self.assertRaises(TypeError, soup.findSelect, '*')
|
||||||
|
|
||||||
|
monkeypatch()
|
||||||
|
|
||||||
|
self.assert_(soup.findSelect('*'))
|
||||||
|
self.assertSelectMultipleExplicit(soup,
|
||||||
|
('link', ['l1']),
|
||||||
|
('div#main', ['main']),
|
||||||
|
('div div', ['inner']),
|
||||||
|
)
|
||||||
|
|
||||||
|
unmonkeypatch()
|
||||||
|
|
||||||
|
self.assertRaises(TypeError, soup.findSelect, '*')
|
||||||
|
|
||||||
|
|
||||||
HTML = """
|
HTML = """
|
||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||||
"http://www.w3.org/TR/html4/strict.dtd">
|
"http://www.w3.org/TR/html4/strict.dtd">
|
||||||
|
|
Loading…
Reference in New Issue