#include <bits/stdc++.h> using namespace std; #define ms(s, n) memset(s, n, sizeof(s)) #define FOR(i, a, b) for (int i = (a); i < (b); i++) #define FORd(i, a, b) for (int i = (a) - 1; i >= (b); i--) #define FORall(it, a) for (__typeof((a).begin()) it = (a).begin(); it != (a).end(); it++) #define sz(a) int((a).size()) #define present(t, x) (t.find(x) != t.end()) #define all(a) (a).begin(), (a).end() #define uni(a) (a).erase(unique(all(a)), (a).end()) #define pb push_back #define pf push_front #define mp make_pair #define fi first #define se second #define prec(n) fixed<<setprecision(n) #define bit(n, i) (((n) >> (i)) & 1) #define bitcount(n) __builtin_popcountll(n) typedef long long ll; typedef unsigned long long ull; typedef long double ld; typedef pair<int, int> pi; typedef vector<int> vi; typedef vector<pi> vii; const int MOD = (int) 1e9 + 7; const int MOD2 = 1007681537; const int INF = (int) 1e9; const ll LINF = (ll) 1e18; const ld PI = acos((ld) -1); const ld EPS = 1e-9; inline ll gcd(ll a, ll b) {ll r; while (b) {r = a % b; a = b; b = r;} return a;} inline ll lcm(ll a, ll b) {return a / gcd(a, b) * b;} inline ll fpow(ll n, ll k, int p = MOD) {ll r = 1; for (; k; k >>= 1) {if (k & 1) r = r * n % p; n = n * n % p;} return r;} template<class T> inline int chkmin(T& a, const T& val) {return val < a ? a = val, 1 : 0;} template<class T> inline int chkmax(T& a, const T& val) {return a < val ? a = val, 1 : 0;} inline ll isqrt(ll k) {ll r = sqrt(k) + 1; while (r * r > k) r--; return r;} inline ll icbrt(ll k) {ll r = cbrt(k) + 1; while (r * r * r > k) r--; return r;} inline void addmod(int& a, int val, int p = MOD) {if ((a = (a + val)) >= p) a -= p;} inline void submod(int& a, int val, int p = MOD) {if ((a = (a - val)) < 0) a += p;} inline int mult(int a, int b, int p = MOD) {return (ll) a * b % p;} inline int inv(int a, int p = MOD) {return fpow(a, p - 2, p);} inline int sign(ld x) {return x < -EPS ? -1 : x > +EPS;} inline int sign(ld x, ld y) {return sign(x - y);} #define db(x) cerr << #x << " = " << (x) << " "; #define endln cerr << "\n"; typedef long double db; struct cp { db x, y; cp(db x = 0, db y = 0) : x(x), y(y) {} cp operator + (const cp& rhs) {return cp(x + rhs.x, y + rhs.y);} cp operator - (const cp& rhs) {return cp(x - rhs.x, y - rhs.y);} cp operator * (const cp& rhs) {return cp(x * rhs.x - y * rhs.y, x * rhs.y + y * rhs.x);} cp operator !() {return cp(x, -y);} }; void fft(cp a[], int n, int invert) { static const db PI = acos((db) -1); cp w = cp(cos(2 * PI / n), sin(2 * PI / n)); if (invert) w = !w; for (int k = n / 2; k >= 1; k >>= 1, w = w * w) { cp t = cp(1, 0); for (int i = 0; i < k; i++, t = t * w) { for (int j = i; j + k < n; j += k + k) { cp x = a[j]; cp y = a[j + k]; a[j] = x + y; a[j + k] = (x - y) * t; } } } int k = __lg(n); for (int msk = 0; msk < n; msk++) { int nmsk = 0; for (int i = 0; i < k; i++) { if ((msk >> i) & 1) { nmsk |= 1 << k - i - 1; } } if (msk < nmsk) { swap(a[msk], a[nmsk]); } } if (invert) { for (int i = 0; i < n; i++) { a[i].x /= n; a[i].y /= n; } } } void multiply(int a[], int b[], int c[], int na, int nb, int mod = (int) 1e9 + 7) { static cp fa[1 << 20], fb[1 << 20]; static cp fc[1 << 20], fd[1 << 20]; int n = na + nb - 1; while (n != (n & -n)) n += n & -n; for (int i = 0; i < n; i++) fa[i] = fb[i] = cp(0, 0); for (int i = 0; i < na; i++) fa[i] = cp(a[i] >> 15, a[i] & (1 << 15) - 1); for (int i = 0; i < nb; i++) fb[i] = cp(b[i] >> 15, b[i] & (1 << 15) - 1); fft(fa, n, 0), fft(fb, n, 0); for (int i = 0; i < n; i++) { int j = (n - i) % n; cp x = fa[i] + !fa[j]; cp y = fb[i] + !fb[j]; cp z = !fa[j] - fa[i]; cp t = !fb[j] - fb[i]; fc[i] = (x * t + y * z) * cp(0, 0.25); fd[i] = x * y * cp(0, 0.25) + z * t * cp(-0.25, 0); } fft(fc, n, 1), fft(fd, n, 1); for (int i = 0; i < n; i++) { long long u = ((long long) floor(fc[i].x + 0.5)) % mod; long long v = ((long long) floor(fd[i].x + 0.5)) % mod; long long w = ((long long) floor(fd[i].y + 0.5)) % mod; c[i] = ((u << 15) + v + (w << 30)) % mod; } } const int maxn = 1.2e7 + 5; const int maxk = 5e4 + 5; int iv[maxn]; int fac[maxn]; int ifac[maxn]; int a[1 << 20]; int b[1 << 20]; int c[1 << 20]; int k, x, p, q; struct Matrix { static const int MAXN = 2; int x[MAXN][MAXN]; Matrix() { memset(x, 0, sizeof(x)); } int* operator [] (int r) { return x[r]; } static Matrix unit() { Matrix res; for (int i = 0; i < MAXN; i++) res[i][i] = 1; return res; } friend Matrix operator + (Matrix A, Matrix B) { Matrix res; for (int i = 0; i < MAXN; i++) for (int j = 0; j < MAXN; j++) { res[i][j] = A[i][j] + B[i][j]; if (res[i][j] >= p) res[i][j] -= p; } return res; } friend Matrix operator * (Matrix A, Matrix B) { Matrix res; for (int i = 0; i < MAXN; i++) for (int j = 0; j < MAXN; j++) { long long sqmod = (long long) p * p; long long sum = 0; for (int k = 0; k < MAXN; k++) { sum += (long long) A[i][k] * B[k][j]; if (sum >= sqmod) sum -= sqmod; } res[i][j] = sum % p; } return res; } friend Matrix operator ^ (Matrix A, long long k) { if (k == 0) return unit(); Matrix res, tmp; for (int i = 0; i < MAXN; i++) for (int j = 0; j < MAXN; j++) { res[i][j] = tmp[i][j] = A[i][j]; } k--; while (k) { if (k & 1) res = res * tmp; tmp = tmp * tmp; k >>= 1; } return res; } }; void build(int p) { iv[1] = 1; for (int i = 2; i < maxn; i++) { iv[i] = (p - (long long) (p / i) * iv[p % i] % p) % p; } fac[0] = 1; FOR(i, 1, maxn) fac[i] = (long long) fac[i - 1] * i % p; ifac[0] = 1; FOR(i, 1, maxn) ifac[i] = (long long) ifac[i - 1] * iv[i] % p; } int binom(int a, int b, int p) { if (a < 0 || a > b) return 0; return (long long) fac[b] * ifac[a] % p * ifac[b - a] % p; } int ispr(int k) { for (int i = 2; i * i <= k; i++) { if (k % i == 0) return 0; } return 1; } int ff(int x, int a, int k) { Matrix mat; mat[0][0] = a % p; mat[0][1] = mult(a, x, p); mat[1][0] = 1; mat = mat ^ k; int res = mult(mat[1][0], x + a, p); addmod(res, mat[1][1], p); return res; } void solve() { cin >> k >> x >> p >> q; assert(1 <= k && k <= 1e5); assert(1 <= x && x <= 1e9); assert(1e8 + 7 <= p && p <= 1e9 + 7 && ispr(p)); assert(1 <= q && q <= 500); build(p); int kk = k + 1 >> 1; FOR(i, 0, kk + 1) { b[i] = mult(fpow(p - 1, i + 1, p), ifac[i + 1], p); } FOR(i, 0, kk + 1) { int t = a[i]; a[i] = k > 1 ? mult(ff(p - i - 1, x, k - 2), x, p) : 1; if (i & 1) a[i] = p - a[i]; a[i] = mult(a[i], ifac[i], p); submod(a[i], t, p); for (int kk = 1; (i + 1) % kk == 0; kk <<= 1) { multiply(a + i - kk + 1, b + kk - 1, c, kk, kk, p); for (int j = i + 1; j < i + 2 * kk; j++) { a[j] = a[j] + c[j - i - 1]; if (a[j] >= p) a[j] -= p; } } } while (q--) { int l, d, t; cin >> l >> d >> t; assert(1 <= l && l <= d && d <= d + t - 1 && d + t - 1 <= 1e7); int res = 0; FOR(i, 0, kk + 1) { int z = binom(l + i + 1, (d + t - 1) + i + 1, p); submod(z, binom(l + i + 1, (d - 1) + i + 1, p), p); addmod(res, mult(z, mult(fac[l + i], a[i], p), p), p); } res = mult(res, ifac[l], p); cout << res << "\n"; } } int main(int argc, char* argv[]) { ios_base::sync_with_stdio(0), cin.tie(0); int JUDGE_ONLINE = 1; if (argc > 1) { JUDGE_ONLINE = 0; assert(freopen(argv[1], "r", stdin)); } if (argc > 2) { JUDGE_ONLINE = 0; assert(freopen(argv[2], "w", stdout)); } solve(); if (!JUDGE_ONLINE) { cerr << "\nTime elapsed: " << 1000 * clock() / CLOCKS_PER_SEC << "ms\n"; } return 0; }