如何在python中在网站上抓取动态javascript内容?

时间:2019-02-24 15:28:29

标签: javascript python web-scraping

我需要用python(https://www.winamax.fr/paris-sportifs/live)抓取此网站,以获取最新赔率并将其保存在csv文件中,但这是JavaScript动态更新的内容(AJAX?对js不太熟悉...)我我尝试过硒,但没有效果

我已经找到了处理此地址(https://static.winamax.fr/betting/client/1.31.1/12.main.js)的js脚本,该脚本使用webpackJsonp模块,因此我猜数据为json格式(如果我输入错了,请更正我,再次我我对javascript不太熟悉)

显示所有信息的

,但确实很混乱,我只能读取标题(例如“现场直播”,“即将发生的事件”,“收藏夹”等),而不是比赛中的实际数据,这是原始版本(一大行代码,这就是为什么使用上面提供的链接阅读起来更容易消化的原因)

(window.webpackJsonp=window.webpackJsonp||[]).push([[12],{696:function(t,e,n){"use strict";var r=n(55),o=n.n(r),a=n(52),i=n(3),u=n(4),c=n.n(u),s=n(10),f=n(275);function l(t){return function(t){if(Array.isArray(t)){for(var e=0,n=new Array(t.length);e<t.length;e++)n[e]=t[e];return n}}(t)||function(t){if(Symbol.iterator in Object(t)||"[object Arguments]"===Object.prototype.toString.call(t))return Array.from(t)}(t)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance")}()}function p(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}var m=f.a.STANDARD,h=f.a.LIVE,v=f.a.FAVORITES,y=Object(a.createSelectorCreator)(a.defaultMemoize,s.j),d=[],E=function(t){return Object(a.createSelector)([function(t,e){return e.location.pathname},function(t,e){return e.match.params.sportId},function(t,e){return e.match.params.categoryId},function(t,e){return e.match.params.tournamentId},function(t,e){return e.match.params.hour}],function(e,n,r,o,a){return function(t){return function(e,n,r,o,a){return function(n){if("home"===t){if("live"===n)return{text:c()("EVENTS_LIVE"),type:h};if("edito"===n)return{text:c()("NEW_TITLE"),type:m};if("to_come"===n)return{text:c()("EVENTS_TO_COME"),type:m}}else if("live"===t){if("live"===n)return e.match(/favorites/)?{text:c()("CURRENT_FAVORITES"),type:v}:{text:c()("EVENTS_LIVE"),type:m};if("prematch"===n)return e.match(/favorites/)?{text:c()("TOCOME_FAVORITES"),type:v}:{text:c()("EVENTS_TO_LIVE"),type:m}}else if("tv"===t){if("live"===n)return e.match(/favorites/)?{text:c()("CURRENT_FAVORITES"),type:v}:{text:c()("EVENTS_LIVE"),type:h};if("prematch"===n)return e.match(/favorites/)?{text:c()("TOCOME_FAVORITES"),type:v}:{text:c()("EVENTS_TO_COME"),type:m}}else if("favorites"===t){if("live"===n)return{text:c()("CURRENT_FAVORITES"),type:v};if("prematch"===n)return{text:c()("TOCOME_FAVORITES"),type:v}}else if("catalog"===t){if("live"===n)return{text:c()("EVENTS_LIVE"),type:m};if("prematch"===n)return{text:c()("EVENTS_TO_COME"),type:m}}else if("calendar"===t)return{text:c()("NEXT_HOURS",{count:s.h.castNumber(a)}),type:m};return{text:void 0,type:m}}}}(t)(e,n,r,o,a)})},O=function(t){return Object(a.createSelector)([function(t,e){return e.match.params.sportId},function(t,e){return e.match.params.categoryId},function(t,e){return e.match.params.tournamentId}],function(e,n,r){return function(t){return function(e,n,r){return function(o){return"home"===t?"live"===o.hlType?"live":"highlight":["tv","favorites"].indexOf(t)>-1?"LIVE"===o.status?"live":"highlight":"live"===t?"LIVE"===o.status?"live":"prematch":"catalog"===t?(void 0!==r?a="tournament":void 0!==n?a="category":void 0!==e&&(a="sport"),"LIVE"===o.status?"live":a):"highlight";var a}}}(t)(e,n,r)})},b=function(t){return Object(a.createSelector)([function(t){return t.entities},function(t,e){return e.match.params.sportId},function(t,e){return e.match.params.categoryId},function(t,e){return e.match.params.tournamentId},function(t,e){return e.match.params.hour},function(t,e){return e.location.pathname}],function(e,n,r,o,a,i){var u,c=t;return["live","tv","catalog"].indexOf(t)>-1?i.indexOf("favorites")>-1?c="favorites":void 0!==o?(c="tournaments",u=o):void 0!==r?(c="categories",u=r):void 0!==n&&(c="sports",u=n):"calendar"===t&&(c="calendar",u=a),u?e[c][u]:e[c]})},g=function(t){return Object(a.createSelector)([function(t,e){return e.match.params.sportId},function(t,e){return e.match.params.categoryId},function(t,e){return e.match.params.tournamentId},function(t,e){return e.location.pathname}],function(e,n,r,o){var a="matches";return"live"===t?o.indexOf("favorites")>-1?a="liveMatches":void 0===r&&void 0===n&&void 0===e||(a="liveMatches"):"tv"===t&&(o.indexOf("favorites")>-1?a="tvMatches":void 0===r&&void 0===n&&void 0===e||(a="tvMatches")),a})},T=function(t){return Object(a.createSelector)([b(t),g(t),function(t){return t.entities.matches}],function(t,e,n){var r=t&&t[e]||d;return Object(i.map)(r,function(t){return n[t]})})},I=function(t){return y([T(t),function(t){return t.entities.sports},function(t){return t.entities.categories},function(t){return t.entities.tournaments}],function(t,e,n,r){return Object(i.reduce)(t,function(t,o){if(void 0!==o&&null!==o){var a=o.sportId,i=o.tournamentId,u=o.categoryId;if(99999!==a){var c,f=e[a];if(o.tvChannels&&(c=s.f.convertToLowerCase(o.tvChannels)),void 0!==f&&null!==f){var l=n[u];if(void 0!==l&&null!==l){var m=r[i];void 0!==m&&null!==m&&t.push(function(t){for(var e=1;e<arguments.length;e++){var n=null!=arguments[e]?arguments[e]:{},r=Object.keys(n);"function"===typeof Object.getOwnPropertySymbols&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(t){return Object.getOwnPropertyDescriptor(n,t).enumerable}))),r.forEach(function(e){p(t,e,n[e])})}return t}({},o,{tvChannels:c,sportName:f.sportName,categoryName:l.categoryName,tournamentName:m.tournamentName}))}}}}return t},[])})},S=function(t){return Object(a.createSelector)([b(t),g(t),I(t)],function(t,e,n){return{isLoading:void 0===t||t&&void 0===t[e],isNull:null===t||t&&null===t[e]||!1,matches:n}})};e.a=function(t){return function(){return y([S(t),O(t),E(t),function(t){return t.entities.grids}],function(e,n,r,a){var u=e.matches,s=e.isLoading,f=e.isNull;if(s||f)return{list:d,isEmpty:0===u.length,isLoading:s,isNull:f};var p={default:function(){return!0}};"home"===t?p={live:["hlType","live"],edito:["hlType","edito"],to_come:function(t){return t&&"live"!==t.hlType&&"edito"!==t.hlType}}:"live"!==t&&"tv"!==t&&"catalog"!==t&&"favorites"!==t||(p={live:["status","LIVE"],prematch:["status","PREMATCH"]});var h=Object(i.reduce)(p,function(e,s,f){var p=Object(i.filter)(u,s);return p.length>0?("catalog"===t&&"prematch"===f||e.push({type:"separator",payload:{item:r(f)}}),p.forEach(function(r,a,i){if("catalog"===t){var u,s,f=i[a-1],l=o.a.unix(r.matchStart).format("l"),p=r.isOutright;a>0&&(u=o.a.unix(f.matchStart).format("l"),s=f.isOutright),"LIVE"!==r.status&&(r.isOutright?s!==p&&e.push({type:"separator",payload:{item:{text:c()("BETS_OUTRIGHTS"),type:m}}}):u!==l&&e.push({type:"separator",payload:{item:{text:o.a.unix(r.matchStart).format(c()("formats.dateShort")),type:m}}}))}e.push({type:n(r),payload:{item:r}})}),"home"===t&&"edito"===f&&a&&a.length&&e.push.apply(e,l(Object(i.map)(a,function(t){return{type:"grid",payload:{item:t}}})))):"home"===t&&0===p.length&&"edito"===f&&a&&a.length&&(e.push({type:"separator",payload:{item:{text:c()("NEW_TITLE"),type:m}}}),e.push.apply(e,l(Object(i.map)(a,function(t){return{type:"grid",payload:{item:t}}})))),e},[]);return{list:h,isEmpty:0===h.length,isLoading:s,isNull:f}})}}},837:function(t,e,n){"use strict";n.r(e);n(6);var r=n(0),o=n.n(r),a=n(12),i=n(4),u=n.n(i),c=n(277),s=n.n(c),f=n(274),l=n(111),p=n(35),m=n(696),h=n(712),v=n(171),y=n(698),d=n(694);function E(t){return(E="function"===typeof Symbol&&"symbol"===typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"===typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}function O(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}function b(t,e){return!e||"object"!==E(e)&&"function"!==typeof e?function(t){if(void 0===t)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return t}(t):e}function g(t){return(g=Object.setPrototypeOf?Object.getPrototypeOf:function(t){return t.__proto__||Object.getPrototypeOf(t)})(t)}function T(t,e){return(T=Object.setPrototypeOf||function(t,e){return t.__proto__=e,t})(t,e)}var I=function(t){function e(){return function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,e),b(this,g(e).apply(this,arguments))}var n,a,i;return function(t,e){if("function"!==typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function");t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,writable:!0,configurable:!0}}),e&&T(t,e)}(e,r["Component"]),n=e,(a=[{key:"componentWillMount",value:function(){var t=this.props,e=t.location,n=t.match;this.dispatchRoute(n.params,e.pathname)}},{key:"componentWillReceiveProps",value:function(t){var e=t.match.params,n=t.location.pathname;this.props.location.pathname!==n&&this.dispatchRoute(e,n)}},{key:"dispatchRoute",value:function(t,e){var n=l.a.LIVE,r=t.sportId,o=t.categoryId,a=t.tournamentId;e.indexOf("favorites")>-1?n+=":favorites":void 0!==a?n+=":tournament:".concat(a):void 0!==o?n+=":category:".concat(o):void 0!==r&&(n+=":sport:".concat(r)),this.props.askRoute(n)}},{key:"render",value:function(){var t=this.props,e=t.isLoading,n=t.isNull,r=t.isEmpty,a=t.list;return e?o.a.createElement(s.a,{title:u()("DOCUMENT_TITLE.LIVE")},o.a.createElement(v.a,{loading:!0})):o.a.createElement(s.a,{title:u()("DOCUMENT_TITLE.LIVE")},o.a.createElement("section",{className:"event-list"},!n&&!r&&o.a.createElement(h.a,{items:a}),(n||r)&&o.a.createElement("p",{className:"no-results centered-message-betting"},u()("EVENTS_NO"))))}}])&&O(n.prototype,a),i&&O(n,i),e}();e.default=Object(f.a)(d.a,y.a,Object(a.connect)(function(){var t=Object(m.a)("live")();return function(e,n){return t(e,n)}},function(t){return{askRoute:function(e){t(p.a.send({route:e}))}}}))(I)}}]);

所以我只需要抓取这些数据并将其返回到网站的几乎相同格式(从最全局的数据到最精炼的数据):

1 /包含所有可用信息,下注和赔率的现场游戏(包括所有运动项目)总数。在此列表中,我可以单独选择任何游戏,并查看可用的信息,赔率和下注。

2 /每种运动的现场比赛总数(例如:足球(8),篮球(6)等),并提供所有信息,下注和赔率

3 /每种运动的所有现场比赛/联赛的列表(例如:如果选择了足球,则返回英超,西班牙联赛,法国联赛1等等),并提供所有信息,下注和赔率

4 //在一项运动中选择了特定的比赛/联赛(例如英国英超联赛)时,显示其所有现场比赛的列表以及所有可用的信息,下注和赔率

5 /最后,在第4 /点的列表中,我需要像第1 /点那样,才能单独选择任何直播游戏以查看其可用的信息,赔率和下注

任何帮助,建议和想法将不胜感激

j.d

1 个答案:

答案 0 :(得分:0)

网站https://www.winamax.fr/paris-sportifs/live使用WebSocket定期获取“新”数据。

导航至“ WS”选项卡(在“网络”选项卡下),并查看从服务器到客户端的数据帧。

尝试使用https://pypi.org/project/websocket_client/之类的WebSocket客户端,并将其指向您要抓取的站点。