Skip to content

Commit 55f53a4

Browse files
committed
bug introduced be update do cwf function. closes #47
1 parent bfacc28 commit 55f53a4

File tree

8 files changed

+130
-27
lines changed

8 files changed

+130
-27
lines changed

.Rbuildignore

+2
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ cookies.txt$
1010
\.Rhistory$
1111
^.*\.Rproj$
1212
^\.Rproj\.user$
13+
.github
14+
_pkgdown.yml

.github/workflows/check-standard.yaml

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2+
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3+
on:
4+
push:
5+
branches: [main, master]
6+
pull_request:
7+
branches: [main, master]
8+
9+
name: R-CMD-check
10+
11+
permissions: read-all
12+
13+
jobs:
14+
R-CMD-check:
15+
runs-on: ${{ matrix.config.os }}
16+
17+
name: ${{ matrix.config.os }} (${{ matrix.config.r }})
18+
19+
strategy:
20+
fail-fast: false
21+
matrix:
22+
config:
23+
- {os: macos-latest, r: 'release'}
24+
- {os: windows-latest, r: 'release'}
25+
- {os: ubuntu-latest, r: 'release'}
26+
27+
env:
28+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29+
R_KEEP_PKG_SOURCE: yes
30+
31+
steps:
32+
- uses: actions/checkout@v4
33+
34+
- uses: r-lib/actions/setup-pandoc@v2
35+
36+
- uses: r-lib/actions/setup-r@v2
37+
with:
38+
r-version: ${{ matrix.config.r }}
39+
http-user-agent: ${{ matrix.config.http-user-agent }}
40+
use-public-rspm: true
41+
42+
- uses: r-lib/actions/setup-r-dependencies@v2
43+
with:
44+
extra-packages: any::rcmdcheck
45+
needs: check
46+
47+
- uses: r-lib/actions/check-r-package@v2
48+
with:
49+
upload-snapshots: true
50+
build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
.Ruserdata

.travis.yml

-9
This file was deleted.

DESCRIPTION

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: psidR
22
Type: Package
33
Title: Build Panel Data Sets from PSID Raw Data
4-
Version: 2.1
5-
Date: 2021-05-06
4+
Version: 2.2
5+
Date: 2024-05-29
66
Author: Florian Oswald
77
Maintainer: Florian Oswald <[email protected]>
88
Description: Makes it easy to build panel data in wide format from Panel Survey
@@ -31,4 +31,4 @@ Collate:
3131
'psidR-package.r'
3232
Suggests:
3333
testthat
34-
RoxygenNote: 7.1.1
34+
RoxygenNote: 7.2.3

R/build.panel.r

+24-6
Original file line numberDiff line numberDiff line change
@@ -336,13 +336,31 @@ build.panel <- function(datadir=NULL,fam.vars,ind.vars=NULL,heads.only=FALSE,cur
336336
# convert fam.vars to data.table
337337
stopifnot(is.data.frame(fam.vars))
338338
fam.vars <- data.table(fam.vars)
339+
340+
if (!all(c("year") %in% names(fam.vars))){
341+
stop("your fam.vars needs to contain column `year`")
342+
}
343+
344+
nlist = grepl(".year|.variable", names(fam.vars))
345+
if (any(nlist)){
346+
flog.error("your `fam.vars` contains illegal names ", names(fam.vars)[nlist], capture = TRUE)
347+
stop()
348+
}
339349
fam.vars <- copy(fam.vars[,lapply(.SD,make.char)])
340350
setkey(fam.vars,year)
341351

342352
# convert ind.vars to data.table if not null
343353
if (!is.null(ind.vars)){
344354
stopifnot(is.data.frame(ind.vars))
355+
if (!all(c("year") %in% names(ind.vars))){
356+
stop("your ind.vars needs to contain columns `year`")
357+
}
345358
ind.vars <- data.table(ind.vars)
359+
nlist = grepl(".year|.variable", names(ind.vars))
360+
if (any(nlist)){
361+
flog.error("your `ind.vars` contains illegal names ", names(ind.vars)[nlist], capture = TRUE)
362+
stop()
363+
}
346364
ind.vars <- copy(ind.vars[,lapply(.SD,make.char)])
347365
setkey(ind.vars,year)
348366
}
@@ -515,7 +533,7 @@ build.panel <- function(datadir=NULL,fam.vars,ind.vars=NULL,heads.only=FALSE,cur
515533
tmp <- data.table(tmp)
516534

517535
vs = ceiling(object.size(tmp)/1024^2)
518-
flog.debug('loaded family file: ',fam.dat[iy])
536+
flog.debug('loaded family file: %s',fam.dat[iy])
519537
flog.debug('current memory load in MB: %d',vs)
520538

521539

@@ -668,8 +686,8 @@ medium.test.ind <- function(dd=NULL){
668686
cwf = openxlsx::read.xlsx(system.file(package="psidR","psid-lists","psid.xlsx"))
669687
head_age_var_name <- getNamesPSID("ER17013", cwf, years=c(2003,2005,2007))
670688
educ = getNamesPSID("ER30323",cwf,years=c(2003,2005,2007))
671-
famvars = data.frame(year=c(2003,2005,2007),age=head_age_var_name)
672-
indvars = data.frame(year=c(2003,2005,2007),educ=educ)
689+
famvars = data.frame(year=c(2003,2005,2007),age=head_age_var_name$variable)
690+
indvars = data.frame(year=c(2003,2005,2007),educ=educ$variable)
673691
build.panel(fam.vars=famvars,ind.vars=indvars,datadir=dd)
674692
}
675693

@@ -680,7 +698,7 @@ medium.test.ind <- function(dd=NULL){
680698
medium.test.noind <- function(dd=NULL){
681699
cwf = openxlsx::read.xlsx(system.file(package="psidR","psid-lists","psid.xlsx"))
682700
head_age_var_name <- getNamesPSID("ER17013", cwf, years=c(2003,2005,2007))
683-
famvars = data.frame(year=c(2003,2005,2007),age=head_age_var_name)
701+
famvars = data.frame(year=c(2003,2005,2007),age=head_age_var_name$variable)
684702
build.panel(fam.vars=famvars,datadir=dd)
685703
}
686704

@@ -694,8 +712,8 @@ medium.test.ind.NA <- function(dd=NULL){
694712
head_age_var_name <- getNamesPSID("ER17013", cwf, years=c(2003,2005,2007))
695713
educ = getNamesPSID("ER30323",cwf,years=c(2003,2005,2007))
696714
educ[2] = NA
697-
famvars = data.frame(year=c(2003,2005,2007),age=head_age_var_name)
698-
indvars = data.frame(year=c(2003,2005,2007),educ=educ)
715+
famvars = data.frame(year=c(2003,2005,2007),age=head_age_var_name$variable)
716+
indvars = data.frame(year=c(2003,2005,2007),educ=educ$variable)
699717
build.panel(fam.vars=famvars,ind.vars=indvars,datadir=dd,loglevel = DEBUG)
700718
}
701719

readme.md

+35-9
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,38 @@ The [Panel Study of Income Dynamics](http://psidonline.isr.umich.edu/) is a publ
4545
This package attempts to help the task of building a panel dataset. The user directly downloads ASCII data from the PSID server into `R`, **without the need** for any other software like stata or sas. To build the panel, the user must then specify the variable names in each wave of the questionnaire in a data.frame `fam.vars`, as well as the variables from the individual index in `ind.vars`. The helper function `getNamesPSID` is helpful in finding different variable names across waves - see examples below.
4646

4747

48-
### Quick Start
48+
### Quick Start and `API`
49+
50+
1. You must supply at least one data.frame with variables to read from the family file. Most of the time you will also supply a data.frame with variables from the individual files to read.
51+
2. Those dataframes **must** be in the following format. I.e. column `year` is an integer and indicates calendar year, the other columns are the _variable names which will appear in your panel_.
52+
53+
```R
54+
> head(i) # individiual file example
55+
year age educ empstat weight
56+
1: 1968 ER30004 ER30010 <NA> ER30019
57+
2: 1969 ER30023 <NA> <NA> ER30042 # NOTICE THE NA for educ HERE!!
58+
3: 1970 ER30046 ER30052 <NA> ER30066
59+
4: 1971 ER30070 ER30076 <NA> ER30090
60+
5: 1972 ER30094 ER30100 <NA> ER30116
61+
6: 1973 ER30120 ER30126 <NA> ER30137
62+
63+
> head(f)) # family file example
64+
year age_youngest_child debt empstat_ faminc hours hvalue ...
65+
1: 1968 V120 <NA> V196 V81 V47 V5 ...
66+
2: 1969 V1013 <NA> V639 V529 V465 V449 ...
67+
3: 1970 V1243 <NA> V1278 V1514 V1138 V1122 ...
68+
4: 1971 V1946 <NA> V1983 V2226 V1839 V1823 ...
69+
5: 1972 V2546 <NA> V2581 V2852 V2439 V2423 ...
70+
6: 1973 V3099 <NA> V3114 V3256 V3027 V3021 ...
71+
```
72+
73+
Example usage:
74+
4975

5076
```R
5177
> library(psidR)
5278

53-
> build.psid(datadr = "~/data/PSID", small = TRUE) # directory `datadr` must exist!
79+
> build.psid(datadir = "~/data/PSID", small = TRUE) # directory `datadir` must exist!
5480
INFO [2021-07-13 10:34:26] Will download missing datasets now
5581
INFO [2021-07-13 10:34:26] will download family files: 2013, 2015
5682
INFO [2021-07-13 10:34:26] will download latest individual index: IND2019ER
@@ -133,7 +159,7 @@ i = fread(file.path(r,"psid-lists","indvars.txt"))
133159
613: PSID Family-level 2017 ER66163 A52 LIKELIHOOD OF MOVING likelihood_move
134160

135161
# alternatively, use `getNamesPSID`:
136-
# cwf <- read.xlsx("http://psidonline.isr.umich.edu/help/xyr/psid.xlsx")
162+
# cwf <- openxlsx::read.xlsx("http://psidonline.isr.umich.edu/help/xyr/psid.xlsx")
137163
# Suppose you know the name of the variable in a certain year, and it is
138164
# "ER17013". then get the correpsonding name in another year with
139165
# getNamesPSID("ER17013", cwf, years = 2001) # 2001 only
@@ -182,7 +208,7 @@ Here are some tests:
182208
cwf = openxlsx::read.xlsx(system.file(package="psidR","psid-lists","psid.xlsx"))
183209
head_age_var_name <- getNamesPSID("ER17013", cwf, years=c(2003))
184210
# create family vars data.frame
185-
famvars = data.frame(year=c(2003),age=head_age_var_name)
211+
famvars = data.frame(year=c(2003),variable=head_age_var_name$variable)
186212
# call function
187213
build.panel(fam.vars=famvars,datadir=dd)
188214

@@ -192,8 +218,8 @@ build.panel(fam.vars=famvars,datadir=dd)
192218
cwf = openxlsx::read.xlsx(system.file(package="psidR","psid-lists","psid.xlsx"))
193219
head_age_var_name <- getNamesPSID("ER17013", cwf, years=c(2003))
194220
educ = getNamesPSID("ER30323",cwf,years=2003)
195-
famvars = data.frame(year=c(2003),age=head_age_var_name)
196-
indvars = data.frame(year=c(2003),educ=educ)
221+
famvars = data.frame(year=c(2003),variable=head_age_var_name$variable)
222+
indvars = data.frame(year=c(2003),variable=educ$variable)
197223
build.panel(fam.vars=famvars,ind.vars=indvars,datadir=dd)
198224

199225

@@ -202,9 +228,9 @@ build.panel(fam.vars=famvars,ind.vars=indvars,datadir=dd)
202228

203229
cwf = openxlsx::read.xlsx(system.file(package="psidR","psid-lists","psid.xlsx"))
204230
head_age_var_name <- getNamesPSID("ER17013", cwf, years=c(2003,2005,2007))
205-
educ = getNamesPSID("ER30323",cwf,years=c(2003,2005,2007))
206-
famvars = data.frame(year=c(2003,2005,2007),age=head_age_var_name)
207-
indvars = data.frame(year=c(2003,2005,2007),educ=educ)
231+
educ = getNamesPSID("ER30323",cwf,years=c(2003,2005,2007))
232+
famvars = data.frame(year=c(2003,2005,2007),variable=head_age_var_name$variable)
233+
indvars = data.frame(year=c(2003,2005,2007),variable=educ$variable)
208234
build.panel(fam.vars=famvars,ind.vars=indvars,datadir=dd)
209235

210236
# etc for

tests/testthat/test_buildpanel.R

+12
Original file line numberDiff line numberDiff line change
@@ -114,4 +114,16 @@ test_that("check subsetting to core/immigrant/latino", {
114114

115115
} )
116116

117+
test_that("wrong famvars and indvars raises an error",{
118+
cwf <- openxlsx::read.xlsx("http://psidonline.isr.umich.edu/help/xyr/psid.xlsx")
119+
famvars = getNamesPSID("ER17013", cwf, years = c(2005, 2007, 2009))
120+
expect_error(build.panel(datadir=my.dir,fam.vars=famvars) )
121+
122+
head_age_var_name <- getNamesPSID("ER17013", cwf, years=c(2003))
123+
124+
famvars = data.frame(year=c(2005, 2007, 2009),age=head_age_var_name)
125+
expect_error(build.panel(datadir=my.dir,fam.vars=famvars) )
126+
127+
})
128+
117129

0 commit comments

Comments
 (0)