2017-05-08 2 views
3

Je suis en train de jouer avec BeautilfulSoup et je cherche un moyen d'obtenir une chaîne json spécifique dans un élément JS.BeautifulSoup - extrait json de JS

Voici le JS:

<script>window.pinball = window.pinball || []; 
window.pinball.push(['add', {"srp_cleanup":"inactive","book_visit":"inactive","my_visits":"inactive"}]); 
window.Rent = window.Rent || {}; 
window.Rent.zutron = {"error_div":".js-generic-error","host":"rent","user_type":null,"zid":null,"origin":null,"provider":null}; 
window.Rent.book_visit = {"book_visit_host":"http://bookavisit.prod.services.rentpath.com"} 
window.Rent.tagging = {"tealium":{"env":"prod","profile":"tealium.rent.com","account":"rentpath"}}; 
window.Rent.realm = "rent"; 
window.Rent.data = {"floorplans":{"1159255":{"availability":"1 Unit Available","availability_class":"floorplan-available-now","unitstyle":"aa1- 1 Bed/1 Bath","deposit":"","floorplan_id":1159255,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"763 sqft","rent":"$1950 - $2322 /mo","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/52ad5930427b3e739676240c01b7d6cc/650-","fp3dfurnished":"http://image1.rent.com/imgr/07733fbd8c8a6a9134d5e0af77d52cb2/650-","floorplanimage":"http://image.rent.com/imgr/44c2395728fa733c2682506d96ec68f5/650-"},"1159257":{"availability":"2 Units Available","availability_class":"floorplan-available-now","unitstyle":"aa3- 1 Bed/1 Bath","deposit":"","floorplan_id":1159257,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"893 sqft","rent":"$1995 - $2531 /mo","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/187753b2e7e6beb5aaf8602514361d89/650-","fp3dfurnished":"http://image.rent.com/imgr/55673aa4253387f0d06aa02495ccf2bc/650-","floorplanimage":"http://image.rent.com/imgr/389adb5ac1fa61c56aa04c88fe97c02f/650-"},"1159259":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"aa5- 1 Bed/1 Bath","deposit":"","floorplan_id":1159259,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"899 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image.rent.com/imgr/24059a4611740bd58436236758d65e20/650-"},"1159256":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"aa2- 1 Bed/1 Bath","deposit":"","floorplan_id":1159256,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"880 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image1.rent.com/imgr/0854a95e69c0b75ee0b13c41db2f31f1/650-"},"1159258":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"aa4- 1 Bed/1 Bath","deposit":"","floorplan_id":1159258,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"897 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image1.rent.com/imgr/deb3efc9ee3933a0a1b4844d886b7a8a/650-"},"1159262":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc3- 2 Bed/2 Bath","deposit":"","floorplan_id":1159262,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1194 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image1.rent.com/imgr/a1fff6050e86f98b7249b843cd6f0836/650-"},"1159263":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc4- 2 Bed/2 Bath","deposit":"","floorplan_id":1159263,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1201 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image1.rent.com/imgr/33e2bb30c9aa1fcdbbf8ce4882a18fcd/650-","fp3dfurnished":"http://image.rent.com/imgr/c4d4df83e18f2b12c8cae6dab523769b/650-","floorplanimage":"http://image.rent.com/imgr/11ac88f52ca904e7646e03b6791f8455/650-"},"1159266":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc7- 2 Bed/2 Bath","deposit":"","floorplan_id":1159266,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1461 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/0a3887c07a7bc05670a826cd5562c49d/650-","fp3dfurnished":"http://image.rent.com/imgr/efa94735904b40ba463cbd26bc5504cf/650-","floorplanimage":"http://image1.rent.com/imgr/36413f72b93f0b0ed2f4f89337ef719d/650-"},"1159264":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc5- 2 Bed/2 Bath","deposit":"","floorplan_id":1159264,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1325 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image.rent.com/imgr/ce1627742dbca97cc44d726b1d906fc3/650-"},"1159267":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bcl1-2 Bed/2 Bath","deposit":"","floorplan_id":1159267,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1500 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/a5888b34db510f6932af116e5197ce0c/650-","fp3dfurnished":"http://image1.rent.com/imgr/68f33736e29613562d9a5618eec1a4c6/650-","floorplanimage":"http://image1.rent.com/imgr/d7a833b56639b121178ddc86ac074754/650-"},"1159261":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc2- 2 Bed/2 Bath","deposit":"","floorplan_id":1159261,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1187 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/33e2bb30c9aa1fcdbbf8ce4882a18fcd/650-","fp3dfurnished":"http://image.rent.com/imgr/c4d4df83e18f2b12c8cae6dab523769b/650-","floorplanimage":"http://image1.rent.com/imgr/11ac88f52ca904e7646e03b6791f8455/650-"},"1159265":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc6- 2 Bed/2 Bath","deposit":"","floorplan_id":1159265,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1400 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/3f80d6e4386db5f450a6750c1a537b84/650-","fp3dfurnished":"http://image1.rent.com/imgr/f54aefd699a9ed3f1d8b6fb8e4ce1500/650-","floorplanimage":"http://image1.rent.com/imgr/b78bda34547615be4973da38dbd9a10f/650-"},"1159260":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc1- 2 Bed/2 Bath","deposit":"","floorplan_id":1159260,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1121 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image1.rent.com/imgr/3b4e4306d4cc2317bd271888532405a0/650-","fp3dfurnished":"http://image1.rent.com/imgr/8ca6a08b9c4eed76575520b4f1dcc03c/650-","floorplanimage":"http://image.rent.com/imgr/f25bcd28009d72a91f02d4e125340b65/650-"},"1159268":{"availability":"1 Unit Available","availability_class":"floorplan-available-now","unitstyle":"cdta1- 3 Bed/3 Bath Office TH","deposit":"","floorplan_id":1159268,"bed":"3 beds","listing_id":"571535","bath":"3 baths","sqft":"2100 sqft","rent":"$3798 - $5073 /mo","propertyname":"Reading Commons","fp3dunfurnished":"http://image1.rent.com/imgr/82ba57c2f1be5071c3d5f48a79c9d45e/650-","fp3dfurnished":"http://image.rent.com/imgr/bc7908ca722b6f9407a247ebf7af49bd/650-","floorplanimage":"http://image.rent.com/imgr/3c881fbe1aba5ba7be68ca6399e7daa3/650-"},"1159269":{"availability":"1 Unit Available","availability_class":"floorplan-available-now","unitstyle":"cdta2- 3 Bed/3 Bath Office TH","deposit":"","floorplan_id":1159269,"bed":"3 beds","listing_id":"571535","bath":"3 baths","sqft":"2310 sqft","rent":"$3908 - $4995 /mo","propertyname":"Reading Commons","fp3dunfurnished":"http://image1.rent.com/imgr/86b5248dfbaef2534218a8bdb724d93e/650-","fp3dfurnished":"http://image.rent.com/imgr/ee01414c664925a3463bad279f943363/650-","floorplanimage":"http://image.rent.com/imgr/ba58885223be2f4f8bfd1588d9ddca9e/650-"}},"reviews":{"startingrecordnumber":1,"totalnumberofmatchingrecords":18,"numberofrecordsreturned":10,"numberofpages":2,"endingrecordnumber":10,"pagenumber":1,"numberofrecordsperpage":10},"listing":{"id":"571535","name":"Reading Commons","address_full":"7 Archstone Circle, Reading, MA 01867","phone_desktop":"(781) 205-2341","visits_enabled":true}}; 
window.Rent.mapbox_api_key = "pk.eyJ1IjoibmhnbWFwYm94IiwiYSI6ImNpb2VrYW5uazAwbHp5OG0yYmp6bms5bjYifQ.4RylIPWDNDEie2NreUsbig"; 
window.Rent.asset_host = "rent.assets.rentpathcdn.com"; 

window.zutron_host = "http://zutron.primedia.com"; 
window.ONESEARCH_URL = "http://onesearch.svc.primedia.com"; 

window.Rent.pageType = "pdp"; 

// these two globals are used in onesearch.js, not sure where else 
window.channel = "apartments"; 
window.APPLICATION = "rent"; 

window.googletag = window.googletag || {}; 
window.googletag.cmd = window.googletag.cmd || []; 

// SID is used by the Moving Leads Service 
window.Rent.MOVING_LEADS_SID = 96;</script> 

j'ai pu tirer la JS via BeautifulSoup et je suis à la recherche de la chaîne JSON correspondant à la clé window.Rent.data.

Y at-il un moyen de le faire sans avoir à recourir à re?

Répondre

2

L'idée est d'utiliser un modèle d'expression régulière avec un groupe de capture. Ensuite, utilisez cette expression régulière pour localiser l'élément script par le texte, puis extraire la sous-chaîne d'un script lui-même. Ensuite, vous pouvez utiliser json.loads() pour charger la chaîne JSON dans un objet Python:

import json 
import re 

from bs4 import BeautifulSoup 

data = """ 
your HTML here""" 

soup = BeautifulSoup(data, "html.parser") 

pattern = re.compile(r"window.Rent.data\s+=\s+(\{.*?\});\n") 
script = soup.find("script", text=pattern) 

data = pattern.search(script.text).group(1) 
data = json.loads(data) 
print(data) 

Il y a aussi une autre façon - un analyseur JavaScript -I've experimented with slimit on StackOverflow a couple of times, vérifier.