-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpycon.py
More file actions
22 lines (17 loc) · 944 Bytes
/
pycon.py
File metadata and controls
22 lines (17 loc) · 944 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from scrapy import Spider, Request
class PyconSpider(Spider):
name = "pycon"
def __init__(self):
self.url = 'http://pycon.pk'
def start_requests(self):
yield Request(url=self.url, callback=self.extract_speakerslink)
def extract_speakerslink(self, response):
speakers_link = response.css("li[id='speakers-2019'] a::attr(href)").extract_first()
url = "{0}{1}".format(self.url, speakers_link)
yield Request(url=url) #Bydefault callback is parse method
def parse(self, response):
speakers_info = response.css("div[class='member-desc']")
from pandas import DataFrame
l = [[info.css("h3::text").extract_first(),info.css("h5::text").extract_first(),info.css("h6::text").extract_first()] for info in speakers_info]
df = DataFrame(l, columns=["Name", "Designation", "Company"])
df.to_csv("Speakers(Pycon2019).csv", index=False, sep=",")