Artur Chukhrai for SerpApi

Posted on Nov 15, 2022 • Edited on Feb 6, 2023 • Originally published at serpapi.com

Scrape Google Play Children (Kids) with Python

#webscraping #tutorial #python #programming

What will be scraped
Full Code
Preparation
Code Explanation
Using Google Play Apps Store API from SerpApi
Links

What will be scraped

📌Note: Google Play gives different results for logged in and not logged in users.

Full Code

If you don't need explanation, have a look at full code example in the online IDE.

import time, json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from parsel import Selector

google_play_children = {}


def scroll_page(url):   
    service = Service(ChromeDriverManager().install())

    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--lang=en")
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36")
    options.add_argument("--no-sandbox")

    driver = webdriver.Chrome(service=service, options=options)
    driver.get(url)

    old_height = driver.execute_script("""
        function getHeight() {
            return document.querySelector('.T4LgNb').scrollHeight;
        }
        return getHeight();
    """)

    while True:
        driver.execute_script("window.scrollTo(0, document.querySelector('.T4LgNb').scrollHeight)")
        time.sleep(1)

        new_height = driver.execute_script("""
            function getHeight() {
                return document.querySelector('.T4LgNb').scrollHeight;
            }
            return getHeight();
        """)

        if new_height == old_height:
            break

        old_height = new_height

    selector = Selector(driver.page_source)
    driver.quit()

    return selector


def scrape_new_and_updated(selector):
    google_play_children['New & Updated'] = []

    for item in selector.css('.TAQqTe'):
        title = item.css('.OnEJge::text').get()
        link = 'https://play.google.com' + item.css('::attr(href)').get()
        category = item.css('.ubGTjb > .w2kbF::text').getall()[-1]
        rating = item.css('.ubGTjb div .w2kbF::text').get()
        rating = float(rating) if rating else rating
        price = item.css('.ePXqnb::text').get()
        price = float(price[1:]) if price else 'free'
        thumbnail = item.css('.j2FCNc img::attr(srcset)').get().replace(' 2x', '')
        screenshot_image = item.css('.Vc0mnc img::attr(src), .jpDEN::attr(src)').get()
        video = item.css('.XdjT2b::attr(data-video-url)').get()

        google_play_children['New & Updated'].append({
            'title': title,
            'link': link,
            'category': category,
            'rating': rating,
            'price': price,
            'thumbnail': thumbnail,
            'screenshot_image': screenshot_image,
            'video': video
        })


def scrape_all_sections(selector):  
    for section in selector.css('section'):
        section_title = section.css('.kcen6d span::text').get()

        if section_title is None:
            continue
        elif section_title == 'New & Updated':
            scrape_new_and_updated(section)
            continue

        google_play_children[section_title] = []

        for item in section.css('.UVEnyf'):
            title = item.css('.Epkrse::text').get()
            link = 'https://play.google.com' + item.css('.Si6A0c::attr(href)').get()
            rating = item.css('.LrNMN:nth-child(1)::text').get()
            rating = float(rating) if rating else rating
            price = item.css('.VixbEe span::text').get()
            price = float(price[1:]) if price else 'free'
            thumbnail = item.css('.etjhNc::attr(srcset)').get()
            thumbnail = thumbnail.replace(' 2x', '') if thumbnail else thumbnail

            google_play_children[section_title].append({
                'title': title,
                'link': link,
                'rating': rating,
                'price': price,
                'thumbnail': thumbnail
            })

    print(json.dumps(google_play_children, indent=2, ensure_ascii=False))


def scrape_google_play_children():
    params = {
        'hl': 'en_GB',              # language 
        'gl': 'US',                 # country of the search
        'age': None                 # category. AGE_RANGE1 = up to 5 years, AGE_RANGE2 = 6-8 years, AGE_RANGE3 = 9-12 years
    }

    if params['age'] in ['AGE_RANGE1', 'AGE_RANGE2', 'AGE_RANGE3']:
        URL = f"https://play.google.com/store/apps/category/FAMILY?age={params['age']}&hl={params['hl']}&gl={params['gl']}"
    else:
        URL = f"https://play.google.com/store/apps/category/FAMILY?hl={params['hl']}&gl={params['gl']}"

    result = scroll_page(URL)
    scrape_all_sections(result)


if __name__ == "__main__":
    scrape_google_play_children()

Preparation

Install libraries:

pip install parsel selenium webdriver webdriver_manager

Reduce the chance of being blocked

Make sure you're using request headers user-agent to act as a "real" user visit. Because default requests user-agent is python-requests and websites understand that it's most likely a script that sends a request. Check what's your user-agent.

There's a how to reduce the chance of being blocked while web scraping blog post that can get you familiar with basic and more advanced approaches.

Code Explanation

Import libraries:

import time, json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from parsel import Selector

Library	Purpose
`time`	to work with time in Python.
`json`	to convert extracted data to a JSON object.
`webdriver`	to drive a browser natively, as a user would, either locally or on a remote machine using the Selenium server.
`Service`	to manage the starting and stopping of the ChromeDriver.
`Selector`	XML/HTML parser that have full XPath and CSS selectors support.

Define the dictionary in which all the extracted data will be stored:

google_play_children = {}

Top-level code environment

At the beginning of the function, parameters are defined for generating the URL. If you want to pass other parameters to the URL, you can do so using the params dictionary. The parameters affect the output results:

params = {
    'hl': 'en_GB',              # language 
    'gl': 'US',                 # country of the search
    'age': None                 # category. AGE_RANGE1 = up to 5 years, AGE_RANGE2 = 6-8 years, AGE_RANGE3 = 9-12 years
}

I want to draw your attention to the fact that by clicking on different categories, different links are formed. This is illustrated more clearly in the GIF below:

In order for the code to work correctly with each of the categories, it was decided to create a condition according to which the corresponding link will be formed:

if params['age'] in ['AGE_RANGE1', 'AGE_RANGE2', 'AGE_RANGE3']:
    URL = f"https://play.google.com/store/apps/category/FAMILY?age={params['age']}&hl={params['hl']}&gl={params['gl']}"
else:
    URL = f"https://play.google.com/store/apps/category/FAMILY?hl={params['hl']}&gl={params['gl']}"

Category	The value of the "age" parameter
All ages up to 12	`None`
Ages up to 5	`AGE_RANGE1`
Ages 6-8	`AGE_RANGE2`
Ages 9-12	`AGE_RANGE3`

Next, the URL is passed to the scroll_page(URL) function to scroll the page and get all data. The result that this function returns is passed to the scrape_all_sections(result) function to extract the necessary data. The explanation of these functions will be in the corresponding headings below.

result = scroll_page(URL)
scrape_all_sections(result)

This code uses the generally accepted rule of using the __name__ == "__main__" construct:

def scrape_google_play_children():
    params = {
        'hl': 'en_GB',              # language 
        'gl': 'US',                 # country of the search
        'age': None                 # category. AGE_RANGE1 = up to 5 years, AGE_RANGE2 = 6-8 years, AGE_RANGE3 = 9-12 years
    }

    if params['age'] in ['AGE_RANGE1', 'AGE_RANGE2', 'AGE_RANGE3']:
        URL = f"https://play.google.com/store/apps/category/FAMILY?age={params['age']}&hl={params['hl']}&gl={params['gl']}"
    else:
        URL = f"https://play.google.com/store/apps/category/FAMILY?hl={params['hl']}&gl={params['gl']}"

    result = scroll_page(URL)
    scrape_all_sections(result)


if __name__ == "__main__":
    scrape_google_play_children()

This check will only be performed if the user has run this file. If the user imports this file into another, then the check will not work.

You can watch the video Python Tutorial: if name == 'main' for more details.

Scroll page

The function takes the URL and returns a full HTML structure.

First, let's understand how pagination works on the Google Play Children page. Data does not load immediately. If the user needs more data, they will simply scroll the page and site download a small package of data.

In this case, selenium library is used, which allows you to simulate user actions in the browser. For selenium to work, you need to use ChromeDriver, which can be downloaded manually or using code. In our case, the second method is used. To control the start and stop of ChromeDriver, you need to use Service which will install browser binaries under the hood:

service = Service(ChromeDriverManager().install())

You should also add options to work correctly:

options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--lang=en')
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36')
options.add_argument('--no-sandbox')

Chrome options	Explanation
`--headless`	to run Chrome in headless mode.
`--lang=en`	to set the browser language to English.
`user-agent`	to act as a "real" user request from the browser by passing it to request headers. Check what's your `user-agent`.
`--no-sandbox`	to make chromedriver work properly on different machines.

Now we can start webdriver and pass the url to the get() method.

driver = webdriver.Chrome(service=service, options=options)
driver.get(url)

The page scrolling algorithm looks like this:

Find out the initial page height and write the result to the old_height variable.
Scroll the page using the script and wait 2 seconds for the data to load.
Find out the new page height and write the result to the new_height variable.
If the variables new_height and old_height are equal, then we complete the algorithm, otherwise we write the value of the variable new_height to the variable old_height and return to step 2.

Getting the page height and scroll is done by pasting the JavaScript code into the execute_script() method.

# 1 step
old_height = driver.execute_script("""
    function getHeight() {
        return document.querySelector('.T4LgNb').scrollHeight;
    }
    return getHeight();
""")

while True:
    # 2 step
    driver.execute_script("window.scrollTo(0, document.querySelector('.T4LgNb').scrollHeight)")
    time.sleep(1)

    # 3 step
    new_height = driver.execute_script("""
        function getHeight() {
            return document.querySelector('.T4LgNb').scrollHeight;
        }
        return getHeight();
    """)

    # 4 step
    if new_height == old_height:
        break

    old_height = new_height

Now we need to process HTML using from Parsel package, in which we pass the HTML structure with all the data that was received after scrolling the page. This is necessary to successfully retrieve data in the next function. After all the operations are done, stop the driver:

selector = Selector(driver.page_source)
driver.quit()

The function looks like this:

def scroll_page(url):   
    service = Service(ChromeDriverManager().install())

    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--lang=en")
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36")
    options.add_argument("--no-sandbox")

    driver = webdriver.Chrome(service=service, options=options)
    driver.get(url)

    old_height = driver.execute_script("""
        function getHeight() {
            return document.querySelector('.T4LgNb').scrollHeight;
        }
        return getHeight();
    """)

    while True:
        driver.execute_script("window.scrollTo(0, document.querySelector('.T4LgNb').scrollHeight)")
        time.sleep(1)

        new_height = driver.execute_script("""
            function getHeight() {
                return document.querySelector('.T4LgNb').scrollHeight;
            }
            return getHeight();
        """)

        if new_height == old_height:
            break

        old_height = new_height

    selector = Selector(driver.page_source)
    driver.quit()

    return selector

In the gif below, I demonstrate how this function works:

Scrape all sections

This function takes a full HTML structure and prints all results in JSON format.

To retrieve data from all sections, you need to find the section selector of the section. You need to iterate each section in the loop:

for section in selector.css('section'):
    # data extraction will be here

It is necessary to extract the section_title and make it a key in the google_play_children dictionary, where a list of extracted data for each item will be added later.

Pay attention to check the value of the section_title variable:

The first condition is used because sometimes an empty section with no values is retrieved.
The second condition is used to retrieve data from the special section. This is the New & Updated section, where data is retrieved differently and for this the scrape_new_and_updated(section) function is used, which will be discussed in the corresponding heading.

section_title = section.css('.kcen6d span::text').get()

if section_title is None:
    continue
elif section_title == 'New & Updated':
    scrape_new_and_updated(section)
    continue

google_play_children[section_title] = []

To extract the necessary data, you need to find the selector where they are located. In our case, this is the .UVEnyf selector, which contains all items. You need to iterate each item in the loop:

for item in section.css('.UVEnyf'):
    # data extraction will be here

For each item, data such as title, link, rating, price and thumbnail are extracted. You need to find the matching selector and get the text or attribute value. I want to additionally note that the thumbnail is retrieved from the srcset attribute, where it is of better quality:

title = item.css('.Epkrse::text').get()
link = 'https://play.google.com' + item.css('.Si6A0c::attr(href)').get()
rating = item.css('.LrNMN:nth-child(1)::text').get()
rating = float(rating) if rating else rating
price = item.css('.VixbEe span::text').get()
price = float(price[1:]) if price else 'free'
thumbnail = item.css('.etjhNc::attr(srcset)').get()
thumbnail = thumbnail.replace(' 2x', '') if thumbnail else thumbnail

📌Note: When extracting the rating, price and thumbnail, a ternary expression is used which handles the values of these data, if any are available.

After extracting the data, they are appended to the google_play_children dictionary by the corresponding key:

google_play_children[section_title].append({
    'title': title,
    'link': link,
    'rating': rating,
    'price': price,
    'thumbnail': thumbnail
})

At the end of the function, the google_play_children dictionary is printed in JSON format using the json.dumps() function:

print(json.dumps(google_play_children, indent=2, ensure_ascii=False))

The complete function to scrape all sections would look like this:

def scrape_all_sections(selector):  
    for section in selector.css('section'):
        section_title = section.css('.kcen6d span::text').get()

        if section_title is None:
            continue
        elif section_title == 'New & Updated':
            scrape_new_and_updated(section)
            continue

        google_play_children[section_title] = []

        for item in section.css('.UVEnyf'):
            title = item.css('.Epkrse::text').get()
            link = 'https://play.google.com' + item.css('.Si6A0c::attr(href)').get()
            rating = item.css('.LrNMN:nth-child(1)::text').get()
            rating = float(rating) if rating else rating
            price = item.css('.VixbEe span::text').get()
            price = float(price[1:]) if price else 'free'
            thumbnail = item.css('.etjhNc::attr(srcset)').get()
            thumbnail = thumbnail.replace(' 2x', '') if thumbnail else thumbnail

            google_play_children[section_title].append({
                'title': title,
                'link': link,
                'rating': rating,
                'price': price,
                'thumbnail': thumbnail
            })

    print(json.dumps(google_play_children, indent=2, ensure_ascii=False))

Code	Explanation
`css()`	to access elements by the passed selector.
`::text` or `::attr(<attribute>)`	to extract textual or attribute data from the node.
`get()`	to actually extract the textual data.
`float()`	to make a floating number from a string value.
`replace()`	to replace all occurrences of the old substring with the new one without extra elements.

Scrape New & Updated section

This function accepts the New & Updated section selector and writes the results to the google_play_children dictionary by the corresponding key.

The New & Updated section does not always appear. It all depends on the passed parameters and the selected category. Accordingly, inside this function, you need to create the corresponding key in the google_play_children dictionary and declare a list in it:

google_play_children['New & Updated'] = []

Now we can start extracting data. This section has a certain number of items that should also need to iterate in a loop using the .TAQqTe selector:

for item in selector.css('.TAQqTe'):
    # data extraction will be here

The difference in data extraction in this function is that data such as category, screenshot_image and video are additionally extracted here. Data is also retrieved by other selectors:

title = item.css('.OnEJge::text').get()
link = 'https://play.google.com' + item.css('::attr(href)').get()
category = item.css('.ubGTjb > .w2kbF::text').getall()[-1]
rating = item.css('.ubGTjb div .w2kbF::text').get()
rating = float(rating) if rating else rating
price = item.css('.ePXqnb::text').get()
price = float(price[1:]) if price else 'free'
thumbnail = item.css('.j2FCNc img::attr(srcset)').get().replace(' 2x', '')
screenshot_image = item.css('.Vc0mnc img::attr(src), .jpDEN::attr(src)').get()
video = item.css('.XdjT2b::attr(data-video-url)').get()

After extracting the data, they are appended to the google_play_children dictionary by the 'New & Updated' key:

google_play_children['New & Updated'].append({
    'title': title,
    'link': link,
    'category': category,
    'rating': rating,
    'price': price,
    'thumbnail': thumbnail,
    'screenshot_image': screenshot_image,
    'video': video
})

The complete function to scrape New & Updated section would look like this:

def scrape_new_and_updated(selector):
    google_play_children['New & Updated'] = []

    for item in selector.css('.TAQqTe'):
        title = item.css('.OnEJge::text').get()
        link = 'https://play.google.com' + item.css('::attr(href)').get()
        category = item.css('.ubGTjb > .w2kbF::text').getall()[-1]
        rating = item.css('.ubGTjb div .w2kbF::text').get()
        rating = float(rating) if rating else rating
        price = item.css('.ePXqnb::text').get()
        price = float(price[1:]) if price else 'free'
        thumbnail = item.css('.j2FCNc img::attr(srcset)').get().replace(' 2x', '')
        screenshot_image = item.css('.Vc0mnc img::attr(src), .jpDEN::attr(src)').get()
        video = item.css('.XdjT2b::attr(data-video-url)').get()

        google_play_children['New & Updated'].append({
            'title': title,
            'link': link,
            'category': category,
            'rating': rating,
            'price': price,
            'thumbnail': thumbnail,
            'screenshot_image': screenshot_image,
            'video': video
        })

Output

Output for scrape_google_play_children() function:

{
  "New & Updated": [
    {
      "title": "PBS KIDS Video",
      "link": "https://play.google.com/store/apps/details?id=org.pbskids.video",
      "category": "Education",
      "rating": 4.4,
      "price": "free",
      "thumbnail": "https://play-lh.googleusercontent.com/EvR3uJ27B4SwWfK3LfwFrt0Sy3byTrmLHVGRndgJuf8rcvKMRyREhaUcXehQUler5w=s128-rw",
      "screenshot_image": "https://play-lh.googleusercontent.com/9MSE2M5sGVy73d75bBemSfZQicBp1cOkjjG-c3tvW5vOVrpOaXdAyjmnbVcBCMWSaLk=w416-h235-rw",
      "video": null
    },
    ... other items
  ],
  "Encourage kindness": [
    {
      "title": "Breathe, Think, Do with Sesame",
      "link": "https://play.google.com/store/apps/details?id=air.com.sesameworkshop.ResilienceThinkBreathDo",
      "rating": 4.0,
      "price": "free",
      "thumbnail": "https://play-lh.googleusercontent.com/-UbCkW4xbM661t4mndTi7owhXY0GYBCRQn4Pxl7_1tXgCCvqKsJwUKE-O61NO0CuJA=s512-rw"
    },
    ... other items
  ],
  ... other sections
  "Fan favorites": [
    {
      "title": "Polar Bear Horizon",
      "link": "https://play.google.com/store/apps/details?id=com.oceanhouse_media.booksmpolarbear_app",
      "rating": 4.7,
      "price": 2.99,
      "thumbnail": "https://play-lh.googleusercontent.com/hnupPIokoza2NOF3faGTeNImvFY2Lw_0aeSwi9a-_yG6e0GpixCgOZ_vNEPwwnXHMSw=s512-rw"
    },
    ... other items
  ]
}

Using Google Play Apps Store API from SerpApi

This section is to show the comparison between the DIY solution and our solution.

The main difference is that it's a quicker approach. Google Play Apps Store API will bypass blocks from search engines and you don't have to create the parser from scratch and maintain it.

First, we need to install google-search-results:

pip install google-search-results

Import the necessary libraries for work:

from serpapi import GoogleSearch
import os, json

Next, we write a search query and the necessary parameters for making a request:

params = {
    # https://docs.python.org/3/library/os.html#os.getenv
    'api_key': os.getenv('API_KEY'),    # your serpapi api
    'engine': 'google_play',            # SerpApi search engine
    'store': 'apps',                    # Google Play store
    'apps_category': 'FAMILY'           # parameter defines the apps and games store category. In this case we use 'FAMILY' to scrape Google Play Children apps
}

The GIF below shows where to select this category in the playground:

We then create a search object where the data is retrieved from the SerpApi backend. In the result_dict dictionary we get data from JSON:

search = GoogleSearch(params)
result_dict = search.get_dict()

The data is retrieved quite simply, we just need to turn to the corresponding key. All sections with required data are in the 'organic_results' key, so you need to iterate over them. For each section, we extract the section_title and make it a key in the google_play_children dictionary, where a list of extracted data for each item will be added later:

google_play_children = {}

for result in result_dict['organic_results']:
    section_title = result['title']
    google_play_children[section_title] = []

The 'items' key contains data about each item in this section. Therefore, it also needs to be iterated in a loop. To get the data, you need to refer to the corresponding key.

Sometimes, data such as rating, price, extracted_price, category or video is missing.

Therefore, the dict.get() method was used to get it, which by default returns None if there is no data. This will look much better than exception handling which is also used to prevent errors but makes the code less readable and more cumbersome:

for item in result['items']:
    google_play_children[section_title].append({
        'title': item['title'],
        'link': item['link'],
        'product_id': item['product_id'],
        'serpapi_link': item['serpapi_link'],
        'rating': item.get('rating'),
        'price': item.get('price', 'free'),
        'extracted_price': item.get('extracted_price', 0),
        'category': item.get('category'),
        'video': item.get('video'),
        'thumbnail': item['title'],
    })

The dict.get(keyname, value) method can be used to retrieve all data from the dictionary, but I decided to demonstrate possible errors in getting data that a user might encounter.

Example code to integrate:

from serpapi import GoogleSearch
import os, json

params = {
    # https://docs.python.org/3/library/os.html#os.getenv
    'api_key': os.getenv('API_KEY'),    # your serpapi api
    'engine': 'google_play',            # SerpApi search engine
    'store': 'apps',                    # Google Play store
    'apps_category': 'FAMILY'           # parameter defines the apps and games store category. In this case we use 'FAMILY' to scrape Google Play Children apps
}

search = GoogleSearch(params)           # where data extraction happens on the SerpApi backend
result_dict = search.get_dict()         # JSON -> Python dict

google_play_children = {}

for result in result_dict['organic_results']:
    section_title = result['title']
    google_play_children[section_title] = []

    for item in result['items']:
        google_play_children[section_title].append({
            'title': item['title'],
            'link': item['link'],
            'product_id': item['product_id'],
            'serpapi_link': item['serpapi_link'],
            'rating': item.get('rating'),
            'price': item.get('price', 'free'),
            'extracted_price': item.get('extracted_price', 0),
            'category': item.get('category'),
            'video': item.get('video'),
            'thumbnail': item['title'],
        })

print(json.dumps(google_play_children, indent=2, ensure_ascii=False))

Output:

{
  "New & updated": [
    {
      "title": "PBS KIDS Video",
      "link": "https://play.google.com/store/apps/details?id=org.pbskids.video",
      "product_id": "org.pbskids.video",
      "serpapi_link": "https://serpapi.com/search.json?engine=google_play_product&gl=us&hl=en&product_id=org.pbskids.video&store=apps",
      "rating": 4.4,
      "price": "free",
      "extracted_price": 0,
      "category": "Education",
      "video": null,
      "thumbnail": "PBS KIDS Video"
    },
    ... other items
    {
      "title": "Little Panda's Toy Adventure",
      "link": "https://play.google.com/store/apps/details?id=com.sinyee.babybus.spliceII",
      "product_id": "com.sinyee.babybus.spliceII",
      "serpapi_link": "https://serpapi.com/search.json?engine=google_play_product&gl=us&hl=en&product_id=com.sinyee.babybus.spliceII&store=apps",
      "rating": 3.3,
      "price": "free",
      "extracted_price": 0,
      "category": "Educational",
      "video": "https://play.google.com/video/lava/web/player/yt:movie:hEEnvV2nOCI?autoplay=1&embed=play",
      "thumbnail": "Little Panda's Toy Adventure"
    }
  ],
  ... other sections
  "Recommended for you": [
    {
      "title": "Toca Kitchen 2",
      "link": "https://play.google.com/store/apps/details?id=com.tocaboca.tocakitchen2",
      "product_id": "com.tocaboca.tocakitchen2",
      "serpapi_link": "https://serpapi.com/search.json?engine=google_play_product&gl=us&hl=en&product_id=com.tocaboca.tocakitchen2&store=apps",
      "rating": 4.1,
      "price": "free",
      "extracted_price": 0,
      "category": null,
      "video": null,
      "thumbnail": "Toca Kitchen 2"
    },
    ... other items
    {
      "title": "Math Kids: Math Games For Kids",
      "link": "https://play.google.com/store/apps/details?id=com.rvappstudios.math.kids.counting",
      "product_id": "com.rvappstudios.math.kids.counting",
      "serpapi_link": "https://serpapi.com/search.json?engine=google_play_product&gl=us&hl=en&product_id=com.rvappstudios.math.kids.counting&store=apps",
      "rating": 4.0,
      "price": "free",
      "extracted_price": 0,
      "category": null,
      "video": null,
      "thumbnail": "Math Kids: Math Games For Kids"
    }
  ]
}

Links

Join us on Twitter | YouTube

Add a Feature Request💫 or a Bug🐞

DEV Community

Scrape Google Play Children (Kids) with Python

What will be scraped

Full Code

Preparation

Code Explanation

Top-level code environment

Scroll page

Scrape all sections

Scrape New & Updated section

Output

Using Google Play Apps Store API from SerpApi

Links

Top comments (0)

Read next

Our Scientific Approach to Aligning Human Capacity with Business Objectives

🚀 Building a User Management API with FastAPI and SQLite

ChatsAPI — The World’s Fastest AI Agent Framework

How to create Subscribe to Newsletter with Reactjs and Supabase