In [None]:
# Visos Lietuvos duomenys

## Stotelės

In [None]:
from io import BytesIO
import requests
import json
from zipfile import ZipFile
import os

import plotly.express as px
import pandas as pd
import datetime

working_directory = f'{os.getcwd()}/../data/saltiniai/vintra/'
gtfs_files_directory = os.path.join(working_directory, 'gtfs')

mapbox_access_token = open("../.mapbox_token").read()
px.set_mapbox_access_token(mapbox_access_token)

lithuania_center = {'lat': 55.169438, 'lon': 23.881275}

with open('../data/geojson/municipalities.geojson', 'r') as municipalities_geojson_file:
    municipalities_geojson = json.load(municipalities_geojson_file)


with ZipFile(os.path.join(gtfs_files_directory, 'gtfs_all.zip')) as gtfs_zip:
    stops_df = pd.read_csv(gtfs_zip.open("stops.txt"))
    routes_df = pd.read_csv(gtfs_zip.open("routes.txt"))
    trips_df = pd.read_csv(gtfs_zip.open("trips.txt"))
    calendar_df = pd.read_csv(gtfs_zip.open("calendar.txt"), parse_dates=['start_date', 'end_date'])
    agency_df = pd.read_csv(gtfs_zip.open("agency.txt"))

    combined_df = trips_df.merge(calendar_df, on='service_id').merge(routes_df, on='route_id').merge(agency_df,
                                                                                                     on='agency_id')

total_routes_and_trips = combined_df.groupby('agency_name').agg(
    {
        'route_id': 'nunique',
        'trip_id': 'nunique',
        'end_date': ['min', 'mean', 'max'],
    }
)

valid_routes_and_trips = combined_df[combined_df['end_date'] > '2022-06-06'].groupby('agency_name').agg(
    {'route_id': 'nunique', 'trip_id': 'nunique'}).rename(
    columns={'route_id': 'valid_routes', 'trip_id': 'valid_trips'})

sorted_agencies = agency_df.iloc[agency_df['agency_name'].str.normalize('NFKD').argsort()]

combined_stats = sorted_agencies[['agency_name']].merge(
    total_routes_and_trips.merge(valid_routes_and_trips, left_index=True, right_index=True), how='left',
    left_on='agency_name', right_index=True)

combined_stats.columns = ['agency_name', 'total_routes', 'total_trips', 'min_trip_date', 'mean_trip_date',
                          'max_trip_date', 'valid_routes', 'valid_trips', ]


combined_stats['date'] = datetime.date(2022, 5, 18)
combined_stats['mean_trip_date'] = combined_stats['mean_trip_date'].dt.date
combined_stats[['total_routes', 'total_trips', 'valid_routes', 'valid_trips']] = combined_stats[
    ['total_routes', 'total_trips', 'valid_routes', 'valid_trips']].fillna(0).astype(int)

combined_stats.to_csv('lithuania-gtfs-stats.csv', )

In [None]:
# print(stops_df['wheelchair_boarding'].value_counts())

```{admonition} Stotelių prieinamumo asmenims su judėjimo negalia informacijos nepateikimas
:class: warning
GTFS failuose nepateikiama informacija apie tai ar stotelės prieinamos asmenims su judėjimo negalia (stops.txt failas `wheelchair_boarding` atributas)
```

In [None]:
stops_df.duplicated(['stop_lat', 'stop_lon']).value_counts()