J'essaye d'obtenir l'image de la page de détails d'un site Web. J'utilise la fonction rss 'links' pour obtenir les liens. Ceci est mon codeQuand j'essaye de gratter la page de détails pour l'image j'obtiens une erreur
@app.task
def pan_task():
url = 'http://feeds.example.com/reuters/technologyNews'
name = 'noticiassin'
live_leaks = [i for i in feedparser.parse(url).entries][:10]
the_count = len(live_leaks)
ky = feedparser.parse(url).keys()
oky = [i.keys() for i in feedparser.parse(url).entries][1] # shows what I can pull
def make_soup(url):
def swappo():
user_one = ' "Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0" '
user_two = ' "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5)" '
user_thr = ' "Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko" '
user_for = ' "Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:10.0) Gecko/20100101 Firefox/10.0" '
agent_list = [user_one, user_two, user_thr, user_for]
a = random.choice(agent_list)
return a
headers = {
"user-agent": swappo(),
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"accept-charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"accept-encoding": "gzip,deflate,sdch",
"accept-language": "en-US,en;q=0.8",
}
the_comments_page = requests.get(url, headers=headers)
soupdata = BeautifulSoup(the_comments_page.text, 'html5lib')
# comment = soupdata.find('a').get('src')
# para = comment.find_all('p')
# kids = [child.text for child in para]
# blu = str(kids).strip('[]')
return soupdata
try:
live_entries = [{'href': live_leak.links[0]['href']} for live_leak in live_leaks]
o = make_soup(live_entries)
except IndexError:
print('error check logs')
live_entries = []
return print(o)
mais quand j'essaie je reçois cette erreur
[2016-10-07 21:10:58,019: ERROR/MainProcess] Task blog.tasks.pan_task[f43ed360-c06e-4a4b-95ab-4f44a4564afa] raised unexpected: InvalidSchema("No connection adapters were found for '[{'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/AA1uAIpygjQ/us-apple-samsung-elec-appeal-idUSKCN1271LF'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/Nz28cqiuS0Y/us-google-pixel-advertising-idUSKCN12721U'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/POLoFj22hc4/us-yahoo-nsa-order-idUSKCN12800D'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/eF-XlhlQl-s/us-fcc-dataservices-idUSKCN1271RB'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/hNf9IQ3rXjw/us-autonomous-nauto-idUSKCN1271FX'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/NXkk5WfWVhM/us-sony-sensors-idUSKCN1270EC'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/gdBvoarqQro/us-yahoo-discrimination-lawsuit-idUSKCN12800K'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/nt8K--27bDg/us-thomsonreuters-ceo-idUSKCN1271DQ'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/f8z3eQg2Fpo/us-snapchat-ipo-idUSKCN12627S'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/rr4vdLsC11Y/us-samsung-elec-results-idUSKCN1262NO'}]'",)
Traceback (most recent call last):
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/celery/app/trace.py", line 240, in trace_task
R = retval = fun(*args, **kwargs)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/celery/app/trace.py", line 438, in __protected_call__
return self.run(*args, **kwargs)
File "/Users/ray/Desktop/myheroku/practice/src/blog/tasks.py", line 134, in pan_task
o = make_soup(live_entries)
File "/Users/ray/Desktop/myheroku/practice/src/blog/tasks.py", line 124, in make_soup
the_comments_page = requests.get(url, headers=headers)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/requests/api.py", line 67, in get
return request('get', url, params=params, **kwargs)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/requests/api.py", line 53, in request
return session.request(method=method, url=url, **kwargs)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/requests/sessions.py", line 570, in send
adapter = self.get_adapter(url=request.url)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/requests/sessions.py", line 644, in get_adapter
raise InvalidSchema("No connection adapters were found for '%s'" % url)
requests.exceptions.InvalidSchema: No connection adapters were found for '[{'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/AA1uAIpygjQ/us-apple-samsung-elec-appeal-idUSKCN1271LF'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/Nz28cqiuS0Y/us-google-pixel-advertising-idUSKCN12721U'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/POLoFj22hc4/us-yahoo-nsa-order-idUSKCN12800D'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/eF-XlhlQl-s/us-fcc-dataservices-idUSKCN1271RB'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/hNf9IQ3rXjw/us-autonomous-nauto-idUSKCN1271FX'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/NXkk5WfWVhM/us-sony-sensors-idUSKCN1270EC'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/gdBvoarqQro/us-yahoo-discrimination-lawsuit-idUSKCN12800K'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/nt8K--27bDg/us-thomsonreuters-ceo-idUSKCN1271DQ'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/f8z3eQg2Fpo/us-snapchat-ipo-idUSKCN12627S'}, {'href': 'http://feeds.reuters.com/~r/reuters/technologyNews/~3/rr4vdLsC11Y/us-samsung-elec-results-idUSKCN1262NO'}]'
whhy ce ne fonctionne pas? J'utilise la fonction dans un autre programme.
prend une URL unique, mais vous faisant passer une liste de dictionnaires. – miah
J'ai utilisé cette fonction dans un programme similaire. La seule différence est qu'il n'utilise pas l'analyseur de flux pour obtenir l'URL. comment puis-je le faire fonctionner? – losee