| Schools {Lahman} | R Documentation |
Information on schools players attended, by school
data(Schools)
A data frame with 1207 observations on the following 5 variables.
schoolIDschool ID code
name_fullschool name
citycity where school is located
statestate where school's city is located
countrycountry where school is located
Lahman, S. (2016) Lahman's Baseball Database, 1871-2015, 2015 version, http://www.seanlahman.com/baseball-archive/statistics/
require("dplyr")
# How many different schools are listed in each state?
table(Schools$state)
# How many different schools are listed in each country?
table(Schools$country)
# Top 20 schools
schoolInfo <- Schools %>% select(-country)
schoolCount <- CollegePlaying %>%
group_by(schoolID) %>%
summarise(players = length(schoolID)) %>%
left_join(schoolInfo, by = "schoolID") %>%
arrange(desc(players))
head(schoolCount, 20)
# sum counts by state
schoolStates <- schoolCount %>%
group_by(state) %>%
summarise(players = sum(players),
schools = length(state))
str(schoolStates)
summary(schoolStates)
## Not run:
if(require(zipcode)) {
# in lieu of more precise geocoding via schoolName,
# find lat/long of Schools from zipcode file
zips <- zipcode %>%
group_by(city, state) %>%
summarise(latitude=mean(latitude),
longitude=mean(longitude))
names(zips)[1:2] <- c("city", "state")
str(zips)
# merge lat/long from zips
schoolsXY <- merge(Schools, zips, by=c("city", "state"), all.x=TRUE)
str(schoolsXY)
# plot school locations
with(subset(schoolsXY, schoolState != 'HI'),
plot(jitter(longitude), jitter(latitude))
)
}
## End(Not run)